[llvm] 1a53b5c - Revert "[llvm-profdata] Refactoring Sample Profile Reader to increase FDO build speed using MD5 as key to Sample Profile map"
Aaron Ballman via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 28 06:41:48 PDT 2023
Author: Aaron Ballman
Date: 2023-07-28T09:41:38-04:00
New Revision: 1a53b5c367b5ebf7d7f34afaa653ea337982f1d6
URL: https://github.com/llvm/llvm-project/commit/1a53b5c367b5ebf7d7f34afaa653ea337982f1d6
DIFF: https://github.com/llvm/llvm-project/commit/1a53b5c367b5ebf7d7f34afaa653ea337982f1d6.diff
LOG: Revert "[llvm-profdata] Refactoring Sample Profile Reader to increase FDO build speed using MD5 as key to Sample Profile map"
This reverts commit 66ba71d913df7f7cd75e92c0c4265932b7c93292.
Addressing issues found by:
https://lab.llvm.org/buildbot/#/builders/245/builds/11732
https://lab.llvm.org/buildbot/#/builders/187/builds/12251
https://lab.llvm.org/buildbot/#/builders/186/builds/11099
https://lab.llvm.org/buildbot/#/builders/182/builds/6976
Added:
Modified:
llvm/include/llvm/ProfileData/SampleProf.h
llvm/include/llvm/ProfileData/SampleProfReader.h
llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
llvm/lib/ProfileData/SampleProf.cpp
llvm/lib/ProfileData/SampleProfReader.cpp
llvm/lib/ProfileData/SampleProfWriter.cpp
llvm/lib/Transforms/IPO/SampleContextTracker.cpp
llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata
llvm/test/tools/llvm-profdata/sample-nametable.test
llvm/tools/llvm-profdata/llvm-profdata.cpp
llvm/tools/llvm-profgen/ProfileGenerator.cpp
llvm/unittests/tools/llvm-profdata/CMakeLists.txt
llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
Removed:
llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
################################################################################
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 76d6b6c599d077..12cc1f2fd002b9 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -318,14 +318,6 @@ struct LineLocationHash {
raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
-static inline hash_code hashFuncName(StringRef F) {
- // If function name is already MD5 string, do not hash again.
- uint64_t Hash;
- if (F.getAsInteger(10, Hash))
- Hash = MD5Hash(F);
- return Hash;
-}
-
/// Representation of a single sample record.
///
/// A sample record is represented by a positive integer value, which
@@ -638,13 +630,9 @@ class SampleContext {
return getContextString(FullContext, false);
}
- hash_code getHashCode() const {
- if (hasContext())
- return hash_value(getContextFrames());
-
- // For non-context function name, use its MD5 as hash value, so that it is
- // consistent with the profile map's key.
- return hashFuncName(getName());
+ uint64_t getHashCode() const {
+ return hasContext() ? hash_value(getContextFrames())
+ : hash_value(getName());
}
/// Set the name of the function and clear the current context.
@@ -722,12 +710,9 @@ class SampleContext {
uint32_t Attributes;
};
-static inline hash_code hash_value(const SampleContext &Context) {
- return Context.getHashCode();
-}
-
-inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) {
- return OS << Context.toString();
+static inline hash_code hash_value(const SampleContext &arg) {
+ return arg.hasContext() ? hash_value(arg.getContextFrames())
+ : hash_value(arg.getName());
}
class FunctionSamples;
@@ -1221,9 +1206,6 @@ class FunctionSamples {
return !(*this == Other);
}
- template <typename T>
- const T &getKey() const;
-
private:
/// CFG hash value for the function.
uint64_t FunctionHash = 0;
@@ -1287,176 +1269,12 @@ class FunctionSamples {
const LocToLocMap *IRToProfileLocationMap = nullptr;
};
-template <>
-inline const SampleContext &FunctionSamples::getKey<SampleContext>() const {
- return getContext();
-}
-
raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
-/// This class is a wrapper to associative container MapT<KeyT, ValueT> using
-/// the hash value of the original key as the new key. This greatly improves the
-/// performance of insert and query operations especially when hash values of
-/// keys are available a priori, and reduces memory usage if KeyT has a large
-/// size.
-/// When performing any action, if an existing entry with a given key is found,
-/// and the interface "KeyT ValueT::getKey<KeyT>() const" to retrieve a value's
-/// original key exists, this class checks if the given key actually matches
-/// the existing entry's original key. If they do not match, this class behaves
-/// as if the entry did not exist (for insertion, this means the new value will
-/// replace the existing entry's value, as if it is newly inserted). If
-/// ValueT::getKey<KeyT>() is not available, all keys with the same hash value
-/// are considered equivalent (i.e. hash collision is silently ignored). Given
-/// such feature this class should only be used where it does not affect
-/// compilation correctness, for example, when loading a sample profile.
-/// Assuming the hashing algorithm is uniform, the probability of hash collision
-/// with 1,000,000 entries is
-/// (2^64)!/((2^64-1000000)!*(2^64)^1000000) ~= 3*10^-8.
-template <template <typename, typename, typename...> typename MapT,
- typename KeyT, typename ValueT, typename... MapTArgs>
-class HashKeyMap : public MapT<hash_code, ValueT, MapTArgs...> {
-public:
- using base_type = MapT<hash_code, ValueT, MapTArgs...>;
- using key_type = hash_code;
- using original_key_type = KeyT;
- using mapped_type = ValueT;
- using value_type = typename base_type::value_type;
+using SampleProfileMap =
+ std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;
- using iterator = typename base_type::iterator;
- using const_iterator = typename base_type::const_iterator;
-
-private:
- // If the value type has getKey(), retrieve its original key for comparison.
- template <typename U = mapped_type,
- typename = decltype(U().template getKey<original_key_type>())>
- static bool
- CheckKeyMatch(const original_key_type &Key, const mapped_type &ExistingValue,
- original_key_type *ExistingKeyIfDifferent = nullptr) {
- const original_key_type &ExistingKey =
- ExistingValue.template getKey<original_key_type>();
- bool Result = (Key == ExistingKey);
- if (!Result && ExistingKeyIfDifferent)
- *ExistingKeyIfDifferent = ExistingKey;
- return Result;
- }
-
- // If getKey() does not exist, this overload is selected, which assumes all
- // keys with the same hash are equivalent.
- static bool CheckKeyMatch(...) { return true; }
-
-public:
- template <typename... Ts>
- std::pair<iterator, bool> try_emplace(const key_type &Hash,
- const original_key_type &Key,
- Ts &&...Args) {
- assert(Hash == hash_value(Key));
- auto Ret = base_type::try_emplace(Hash, std::forward<Ts>(Args)...);
- if (!Ret.second) {
- original_key_type ExistingKey;
- if (LLVM_UNLIKELY(!CheckKeyMatch(Key, Ret.first->second, &ExistingKey))) {
- dbgs() << "MD5 collision detected: " << Key << " and " << ExistingKey
- << " has same hash value " << Hash << "\n";
- Ret.second = true;
- Ret.first->second = mapped_type(std::forward<Ts>(Args)...);
- }
- }
- return Ret;
- }
-
- template <typename... Ts>
- std::pair<iterator, bool> try_emplace(const original_key_type &Key,
- Ts &&...Args) {
- key_type Hash = hash_value(Key);
- return try_emplace(Hash, Key, std::forward<Ts>(Args)...);
- }
-
- template <typename... Ts> std::pair<iterator, bool> emplace(Ts &&...Args) {
- return try_emplace(std::forward<Ts>(Args)...);
- }
-
- mapped_type &operator[](const original_key_type &Key) {
- return try_emplace(Key, mapped_type()).first->second;
- }
-
- iterator find(const original_key_type &Key) {
- key_type Hash = hash_value(Key);
- auto It = base_type::find(Hash);
- if (It != base_type::end())
- if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
- return It;
- return base_type::end();
- }
-
- const_iterator find(const original_key_type &Key) const {
- key_type Hash = hash_value(Key);
- auto It = base_type::find(Hash);
- if (It != base_type::end())
- if (LLVM_LIKELY(CheckKeyMatch(Key, It->second)))
- return It;
- return base_type::end();
- }
-
- size_t erase(const original_key_type &Ctx) {
- auto It = find(Ctx);
- if (It != base_type::end()) {
- base_type::erase(It);
- return 1;
- }
- return 0;
- }
-};
-
-/// This class provides operator overloads to the map container using MD5 as the
-/// key type, so that existing code can still work in most cases using
-/// SampleContext as key.
-/// Note: when populating container, make sure to assign the SampleContext to
-/// the mapped value immediately because the key no longer holds it.
-class SampleProfileMap
- : public HashKeyMap<DenseMap, SampleContext, FunctionSamples> {
-public:
- // Convenience method because this is being used in many places. Set the
- // FunctionSamples' context if its newly inserted.
- mapped_type &Create(const SampleContext &Ctx) {
- auto Ret = try_emplace(Ctx, FunctionSamples());
- if (Ret.second)
- Ret.first->second.setContext(Ctx);
- return Ret.first->second;
- }
-
- iterator find(const SampleContext &Ctx) {
- return HashKeyMap<llvm::DenseMap, SampleContext, FunctionSamples>::find(
- Ctx);
- }
-
- const_iterator find(const SampleContext &Ctx) const {
- return HashKeyMap<llvm::DenseMap, SampleContext, FunctionSamples>::find(
- Ctx);
- }
-
- // Overloaded find() to lookup a function by name. This is called by IPO
- // passes with an actual function name, and it is possible that the profile
- // reader converted function names in the profile to MD5 strings, so we need
- // to check if either representation matches.
- iterator find(StringRef Fname) {
- hash_code Hash = hashFuncName(Fname);
- auto It = base_type::find(Hash);
- if (It != end()) {
- StringRef CtxName = It->second.getContext().getName();
- if (LLVM_LIKELY(CtxName == Fname || CtxName == std::to_string(Hash)))
- return It;
- }
- return end();
- }
-
- size_t erase(const SampleContext &Ctx) {
- return HashKeyMap<llvm::DenseMap, SampleContext, FunctionSamples>::erase(
- Ctx);
- }
-
- size_t erase(const key_type &Key) { return base_type::erase(Key); }
-};
-
-using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>;
+using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;
void sortFuncProfiles(const SampleProfileMap &ProfileMap,
std::vector<NameFunctionSamples> &SortedProfiles);
@@ -1502,6 +1320,8 @@ class SampleContextTrimmer {
bool MergeColdContext,
uint32_t ColdContextFrameLength,
bool TrimBaseProfileOnly);
+ // Canonicalize context profile name and attributes.
+ void canonicalizeContextProfiles();
private:
SampleProfileMap &ProfileMap;
@@ -1547,12 +1367,12 @@ class ProfileConverter {
SampleProfileMap &OutputProfiles,
bool ProfileIsCS = false) {
if (ProfileIsCS) {
- for (const auto &I : InputProfiles) {
- // Retain the profile name and clear the full context for each function
- // profile.
- FunctionSamples &FS = OutputProfiles.Create(I.second.getName());
- FS.merge(I.second);
- }
+ for (const auto &I : InputProfiles)
+ OutputProfiles[I.second.getName()].merge(I.second);
+ // Retain the profile name and clear the full context for each function
+ // profile.
+ for (auto &I : OutputProfiles)
+ I.second.setContext(SampleContext(I.first));
} else {
for (const auto &I : InputProfiles)
flattenNestedProfile(OutputProfiles, I.second);
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index d32516252eddbc..e14b0bfc7912aa 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -347,7 +347,7 @@ class SampleProfileReader {
public:
SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
SampleProfileFormat Format = SPF_None)
- : Profiles(), Ctx(C), Buffer(std::move(B)), Format(Format) {}
+ : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}
virtual ~SampleProfileReader() = default;
@@ -383,8 +383,8 @@ class SampleProfileReader {
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;
- /// Print the profile for \p FunctionSamples on stream \p OS.
- void dumpFunctionProfile(const FunctionSamples &FS, raw_ostream &OS = dbgs());
+ /// Print the profile for \p FContext on stream \p OS.
+ void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());
/// Collect functions with definitions in Module M. For reader which
/// support loading function profiles on demand, return true when the
@@ -410,15 +410,23 @@ class SampleProfileReader {
/// Return the samples collected for function \p F, create empty
/// FunctionSamples if it doesn't exist.
FunctionSamples *getOrCreateSamplesFor(const Function &F) {
+ std::string FGUID;
StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
- auto it = Profiles.find(CanonName);
- if (it != Profiles.end())
- return &it->second;
+ CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
+ auto It = Profiles.find(CanonName);
+ if (It != Profiles.end())
+ return &It->second;
+ if (!FGUID.empty()) {
+ assert(useMD5() && "New name should only be generated for md5 profile");
+ CanonName = *MD5NameBuffer.insert(FGUID).first;
+ }
return &Profiles[CanonName];
}
/// Return the samples collected for function \p F.
- FunctionSamples *getSamplesFor(StringRef Fname) {
+ virtual FunctionSamples *getSamplesFor(StringRef Fname) {
+ std::string FGUID;
+ Fname = getRepInFormat(Fname, useMD5(), FGUID);
auto It = Profiles.find(Fname);
if (It != Profiles.end())
return &It->second;
@@ -646,16 +654,15 @@ class SampleProfileReaderBinary : public SampleProfileReader {
/// Read the whole name table.
std::error_code readNameTable();
- /// Read a string indirectly via the name table. Optionally return the index.
- ErrorOr<StringRef> readStringFromTable(size_t *RetIdx = nullptr);
+ /// Read a string indirectly via the name table.
+ ErrorOr<StringRef> readStringFromTable();
- /// Read a context indirectly via the CSNameTable. Optionally return the
- /// index.
- ErrorOr<SampleContextFrames> readContextFromTable(size_t *RetIdx = nullptr);
+ /// Read a context indirectly via the CSNameTable.
+ ErrorOr<SampleContextFrames> readContextFromTable();
/// Read a context indirectly via the CSNameTable if the profile has context,
- /// otherwise same as readStringFromTable, also return its hash value.
- ErrorOr<std::pair<SampleContext, hash_code>> readSampleContextFromTable();
+ /// otherwise same as readStringFromTable.
+ ErrorOr<SampleContext> readSampleContextFromTable();
/// Points to the current location in the buffer.
const uint8_t *Data = nullptr;
@@ -675,24 +682,13 @@ class SampleProfileReaderBinary : public SampleProfileReader {
/// the lifetime of MD5StringBuf is not shorter than that of NameTable.
std::vector<std::string> MD5StringBuf;
- /// The starting address of fixed length MD5 name table section.
+ /// The starting address of NameTable containing fixed length MD5.
const uint8_t *MD5NameMemStart = nullptr;
/// CSNameTable is used to save full context vectors. It is the backing buffer
/// for SampleContextFrames.
std::vector<SampleContextFrameVector> CSNameTable;
- /// Table to cache MD5 values of sample contexts corresponding to
- /// readSampleContextFromTable(), used to index into Profiles or
- /// FuncOffsetTable.
- std::vector<hash_code> MD5SampleContextTable;
-
- /// The starting address of the table of MD5 values of sample contexts. For
- /// fixed length MD5 non-CS profile it is same as MD5NameMemStart because
- /// hashes of non-CS contexts are already in the profile. Otherwise it points
- /// to the start of MD5SampleContextTable.
- const hash_code *MD5SampleContextStart = nullptr;
-
private:
std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
@@ -766,10 +762,10 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
std::unique_ptr<ProfileSymbolList> ProfSymList;
- /// The table mapping from a function context's MD5 to the offset of its
+ /// The table mapping from function context to the offset of its
/// FunctionSample towards file start.
/// At most one of FuncOffsetTable and FuncOffsetList is populated.
- DenseMap<hash_code, uint64_t> FuncOffsetTable;
+ DenseMap<SampleContext, uint64_t> FuncOffsetTable;
/// The list version of FuncOffsetTable. This is used if every entry is
/// being accessed.
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 3a45113b0a2eae..8e07478fb083b7 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -204,7 +204,9 @@ SampleProfileSummaryBuilder::computeSummaryForProfiles(
// profiles before computing profile summary.
if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
!UseContextLessSummary.getNumOccurrences())) {
- ProfileConverter::flattenProfile(Profiles, ContextLessProfiles, true);
+ for (const auto &I : Profiles) {
+ ContextLessProfiles[I.second.getName()].merge(I.second);
+ }
ProfilesToUse = &ContextLessProfiles;
}
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index b14dc01be23624..fdae8a011e7120 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -202,12 +202,13 @@ void sampleprof::sortFuncProfiles(
const SampleProfileMap &ProfileMap,
std::vector<NameFunctionSamples> &SortedProfiles) {
for (const auto &I : ProfileMap) {
- SortedProfiles.push_back(std::make_pair(I.first, &I.second));
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second));
}
llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
const NameFunctionSamples &B) {
if (A.second->getTotalSamples() == B.second->getTotalSamples())
- return A.second->getContext() < B.second->getContext();
+ return A.first < B.first;
return A.second->getTotalSamples() > B.second->getTotalSamples();
});
}
@@ -356,13 +357,13 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
// Filter the cold profiles from ProfileMap and move them into a tmp
// container
- std::vector<std::pair<hash_code, const FunctionSamples *>> ColdProfiles;
+ std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
for (const auto &I : ProfileMap) {
- const SampleContext &Context = I.second.getContext();
+ const SampleContext &Context = I.first;
const FunctionSamples &FunctionProfile = I.second;
if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
(!TrimBaseProfileOnly || Context.isBaseContext()))
- ColdProfiles.emplace_back(I.first, &I.second);
+ ColdProfiles.emplace_back(Context, &I.second);
}
// Remove the cold profile from ProfileMap and merge them into
@@ -373,8 +374,8 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
auto MergedContext = I.second->getContext().getContextFrames();
if (ColdContextFrameLength < MergedContext.size())
MergedContext = MergedContext.take_back(ColdContextFrameLength);
- // Need to set MergedProfile's context here otherwise it will be lost.
- FunctionSamples &MergedProfile = MergedProfileMap.Create(MergedContext);
+ auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples());
+ FunctionSamples &MergedProfile = Ret.first->second;
MergedProfile.merge(*I.second);
}
ProfileMap.erase(I.first);
@@ -384,17 +385,57 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
for (const auto &I : MergedProfileMap) {
// Filter the cold merged profile
if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
- ProfileMap.find(I.second.getContext()) == ProfileMap.end())
+ ProfileMap.find(I.first) == ProfileMap.end())
continue;
// Merge the profile if the original profile exists, otherwise just insert
- // as a new profile. If inserted as a new profile from MergedProfileMap, it
- // already has the right context.
- auto Ret = ProfileMap.emplace(I.second.getContext(), FunctionSamples());
+ // as a new profile
+ auto Ret = ProfileMap.emplace(I.first, FunctionSamples());
+ if (Ret.second) {
+ SampleContext FContext(Ret.first->first, RawContext);
+ FunctionSamples &FProfile = Ret.first->second;
+ FProfile.setContext(FContext);
+ }
FunctionSamples &OrigProfile = Ret.first->second;
OrigProfile.merge(I.second);
}
}
+void SampleContextTrimmer::canonicalizeContextProfiles() {
+ std::vector<SampleContext> ProfilesToBeRemoved;
+ SampleProfileMap ProfilesToBeAdded;
+ for (auto &I : ProfileMap) {
+ FunctionSamples &FProfile = I.second;
+ SampleContext &Context = FProfile.getContext();
+ if (I.first == Context)
+ continue;
+
+ // Use the context string from FunctionSamples to update the keys of
+ // ProfileMap. They can get out of sync after context profile promotion
+ // through pre-inliner.
+ // Duplicate the function profile for later insertion to avoid a conflict
+ // caused by a context both to be add and to be removed. This could happen
+ // when a context is promoted to another context which is also promoted to
+ // the third context. For example, given an original context A @ B @ C that
+ // is promoted to B @ C and the original context B @ C which is promoted to
+ // just C, adding B @ C to the profile map while removing same context (but
+ // with
diff erent profiles) from the map can cause a conflict if they are
+ // not handled in a right order. This can be solved by just caching the
+ // profiles to be added.
+ auto Ret = ProfilesToBeAdded.emplace(Context, FProfile);
+ (void)Ret;
+ assert(Ret.second && "Context conflict during canonicalization");
+ ProfilesToBeRemoved.push_back(I.first);
+ }
+
+ for (auto &I : ProfilesToBeRemoved) {
+ ProfileMap.erase(I);
+ }
+
+ for (auto &I : ProfilesToBeAdded) {
+ ProfileMap.emplace(I.first, I.second);
+ }
+}
+
std::error_code ProfileSymbolList::write(raw_ostream &OS) {
// Sort the symbols before output. If doing compression.
// It will make the compression much more effective.
@@ -468,7 +509,6 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
if (!ChildProfile)
continue;
SampleContext OrigChildContext = ChildProfile->getContext();
- hash_code OrigChildContextHash = OrigChildContext.getHashCode();
// Reset the child context to be contextless.
ChildProfile->getContext().setName(OrigChildContext.getName());
if (NodeProfile) {
@@ -484,7 +524,6 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
NodeProfile->removeTotalSamples(Count);
}
- hash_code NewChildProfileHash(0);
// Separate child profile to be a standalone profile, if the current parent
// profile doesn't exist. This is a duplicating operation when the child
// profile is already incorporated into the parent which is still useful and
@@ -493,20 +532,15 @@ void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
// profile in the prelink phase for to-be-fully-inlined functions.
if (!NodeProfile) {
ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
- NewChildProfileHash = ChildProfile->getContext().getHashCode();
} else if (GenerateMergedBaseProfiles) {
ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
- NewChildProfileHash = ChildProfile->getContext().getHashCode();
auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
ContextDuplicatedIntoBase);
}
- // Remove the original child profile. Check if MD5 of new child profile
- // collides with old profile, in this case the [] operator already
- // overwritten it without the need of erase.
- if (NewChildProfileHash != OrigChildContextHash)
- ProfileMap.erase(OrigChildContextHash);
+ // Remove the original child profile.
+ ProfileMap.erase(OrigChildContext);
}
}
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 0b470824062b66..fbdd9a307321bf 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -61,9 +61,9 @@ static cl::opt<bool> ProfileIsFSDisciminator(
///
/// \param FContext Name + context of the function to print.
/// \param OS Stream to emit the output to.
-void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
+void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
raw_ostream &OS) {
- OS << "Function: " << FS.getContext().toString() << ": " << FS;
+ OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
}
/// Dump all the function profiles found on stream \p OS.
@@ -71,7 +71,7 @@ void SampleProfileReader::dump(raw_ostream &OS) {
std::vector<NameFunctionSamples> V;
sortFuncProfiles(Profiles, V);
for (const auto &I : V)
- dumpFunctionProfile(*I.second, OS);
+ dumpFunctionProfile(I.first, OS);
}
static void dumpFunctionProfileJson(const FunctionSamples &S,
@@ -355,7 +355,9 @@ std::error_code SampleProfileReaderText::readImpl() {
SampleContext FContext(FName, CSNameTable);
if (FContext.hasContext())
++CSProfileCount;
- FunctionSamples &FProfile = Profiles.Create(FContext);
+ Profiles[FContext] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FContext];
+ FProfile.setContext(FContext);
MergeResult(Result, FProfile.addTotalSamples(NumSamples));
MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
InlineStack.clear();
@@ -523,8 +525,7 @@ inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
return *Idx;
}
-ErrorOr<StringRef>
-SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
auto Idx = readStringIndex(NameTable);
if (std::error_code EC = Idx.getError())
return EC;
@@ -542,52 +543,30 @@ SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) {
MD5NameMemStart + (*Idx) * sizeof(uint64_t));
SR = MD5StringBuf.emplace_back(std::to_string(FID));
}
- if (RetIdx)
- *RetIdx = *Idx;
return SR;
}
-ErrorOr<SampleContextFrames>
-SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) {
+ErrorOr<SampleContextFrames> SampleProfileReaderBinary::readContextFromTable() {
auto ContextIdx = readNumber<size_t>();
if (std::error_code EC = ContextIdx.getError())
return EC;
if (*ContextIdx >= CSNameTable.size())
return sampleprof_error::truncated_name_table;
- if (RetIdx)
- *RetIdx = *ContextIdx;
return CSNameTable[*ContextIdx];
}
-ErrorOr<std::pair<SampleContext, hash_code>>
-SampleProfileReaderBinary::readSampleContextFromTable() {
- SampleContext Context;
- size_t Idx;
+ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
if (ProfileIsCS) {
- auto FContext(readContextFromTable(&Idx));
+ auto FContext(readContextFromTable());
if (std::error_code EC = FContext.getError())
return EC;
- Context = SampleContext(*FContext);
+ return SampleContext(*FContext);
} else {
- auto FName(readStringFromTable(&Idx));
+ auto FName(readStringFromTable());
if (std::error_code EC = FName.getError())
return EC;
- Context = SampleContext(*FName);
- }
- // Since MD5SampleContextStart may point to the profile's file data, need to
- // make sure it is reading the same value on big endian CPU.
- hash_code Hash =
- support::endian::read<hash_code, support::little, support::unaligned>(
- MD5SampleContextStart + Idx);
- // Lazy computing of hash value, write back to the table to cache it. Only
- // compute the context's hash value if it is being referenced for the first
- // time.
- if (Hash == hash_code(0)) {
- assert(MD5SampleContextStart == MD5SampleContextTable.data());
- Hash = Context.getHashCode();
- support::endian::write(&MD5SampleContextTable[Idx], Hash, support::little);
+ return SampleContext(*FName);
}
- return std::make_pair(Context, Hash);
}
std::error_code
@@ -680,18 +659,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
if (std::error_code EC = NumHeadSamples.getError())
return EC;
- auto FContextHash(readSampleContextFromTable());
- if (std::error_code EC = FContextHash.getError())
+ ErrorOr<SampleContext> FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- auto &[FContext, Hash] = *FContextHash;
- // Use the cached hash value for insertion instead of recalculating it.
- auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
- FunctionSamples &FProfile = Res.first->second;
- FProfile.setContext(FContext);
+ Profiles[*FContext] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[*FContext];
+ FProfile.setContext(*FContext);
FProfile.addHeadSamples(*NumHeadSamples);
- if (FContext.hasContext())
+ if (FContext->hasContext())
CSProfileCount++;
if (std::error_code EC = readProfile(FProfile))
@@ -839,21 +816,18 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
FuncOffsetTable.reserve(*Size);
for (uint64_t I = 0; I < *Size; ++I) {
- auto FContextHash(readSampleContextFromTable());
- if (std::error_code EC = FContextHash.getError())
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- auto &[FContext, Hash] = *FContextHash;
auto Offset = readNumber<uint64_t>();
if (std::error_code EC = Offset.getError())
return EC;
if (UseFuncOffsetList)
- FuncOffsetList.emplace_back(FContext, *Offset);
+ FuncOffsetList.emplace_back(*FContext, *Offset);
else
- // Because Porfiles replace existing value with new value if collision
- // happens, we also use the latest offset so that they are consistent.
- FuncOffsetTable[Hash] = *Offset;
+ FuncOffsetTable[*FContext] = *Offset;
}
return sampleprof_error::success;
@@ -926,8 +900,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
} else if (useMD5()) {
assert(!useFuncOffsetList());
for (auto Name : FuncsToUse) {
- auto GUID = MD5Hash(Name);
- auto iter = FuncOffsetTable.find(GUID);
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
if (iter == FuncOffsetTable.end())
continue;
const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -948,7 +922,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
} else {
assert(!useFuncOffsetList());
for (auto Name : FuncsToUse) {
- auto iter = FuncOffsetTable.find(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(Name);
if (iter == FuncOffsetTable.end())
continue;
const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -1076,30 +1050,17 @@ std::error_code SampleProfileReaderBinary::readNameTable() {
NameTable.clear();
NameTable.reserve(*Size);
- if (!ProfileIsCS) {
- MD5SampleContextTable.clear();
- if (UseMD5)
- MD5SampleContextTable.reserve(*Size);
- else
- // If we are using strings, delay MD5 computation since only a portion of
- // names are used by top level functions. Use 0 to indicate MD5 value is
- // to be calculated as no known string has a MD5 value of 0.
- MD5SampleContextTable.resize(*Size);
- }
for (size_t I = 0; I < *Size; ++I) {
auto Name(readString());
if (std::error_code EC = Name.getError())
return EC;
if (UseMD5) {
- uint64_t FID = hashFuncName(*Name);
- if (!ProfileIsCS)
- MD5SampleContextTable.emplace_back(FID);
+ uint64_t FID = MD5Hash(*Name);
NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID)));
} else
NameTable.push_back(*Name);
}
- if (!ProfileIsCS)
- MD5SampleContextStart = MD5SampleContextTable.data();
+
return sampleprof_error::success;
}
@@ -1127,8 +1088,6 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
NameTable.clear();
NameTable.resize(*Size);
MD5NameMemStart = Data;
- if (!ProfileIsCS)
- MD5SampleContextStart = reinterpret_cast<const hash_code *>(Data);
Data = Data + (*Size) * sizeof(uint64_t);
return sampleprof_error::success;
}
@@ -1142,19 +1101,12 @@ SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
NameTable.clear();
NameTable.reserve(*Size);
- if (!ProfileIsCS)
- MD5SampleContextTable.resize(*Size);
for (size_t I = 0; I < *Size; ++I) {
auto FID = readNumber<uint64_t>();
if (std::error_code EC = FID.getError())
return EC;
- if (!ProfileIsCS)
- support::endian::write(&MD5SampleContextTable[I], *FID,
- support::little);
NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID)));
}
- if (!ProfileIsCS)
- MD5SampleContextStart = MD5SampleContextTable.data();
return sampleprof_error::success;
}
@@ -1172,14 +1124,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
CSNameTable.clear();
CSNameTable.reserve(*Size);
- if (ProfileIsCS) {
- // Delay MD5 computation of CS context until they are needed. Use 0 to
- // indicate MD5 value is to be calculated as no known string has a MD5
- // value of 0.
- MD5SampleContextTable.clear();
- MD5SampleContextTable.resize(*Size);
- MD5SampleContextStart = MD5SampleContextTable.data();
- }
for (size_t I = 0; I < *Size; ++I) {
CSNameTable.emplace_back(SampleContextFrameVector());
auto ContextSize = readNumber<uint32_t>();
@@ -1243,17 +1187,16 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
if (std::error_code EC = Discriminator.getError())
return EC;
- auto FContextHash(readSampleContextFromTable());
- if (std::error_code EC = FContextHash.getError())
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- auto &[FContext, Hash] = *FContextHash;
FunctionSamples *CalleeProfile = nullptr;
if (FProfile) {
CalleeProfile = const_cast<FunctionSamples *>(
&FProfile->functionSamplesAt(LineLocation(
*LineOffset,
- *Discriminator))[std::string(FContext.getName())]);
+ *Discriminator))[std::string(FContext.get().getName())]);
}
if (std::error_code EC =
readFuncMetadata(ProfileHasAttribute, CalleeProfile))
@@ -1268,12 +1211,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
std::error_code
SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
while (Data < End) {
- auto FContextHash(readSampleContextFromTable());
- if (std::error_code EC = FContextHash.getError())
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- auto &[FContext, Hash] = *FContextHash;
FunctionSamples *FProfile = nullptr;
- auto It = Profiles.find(FContext);
+ auto It = Profiles.find(*FContext);
if (It != Profiles.end())
FProfile = &It->second;
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index f418dbe7105709..0873093ad426c5 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -357,13 +357,14 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
encodeULEB128(NameTable.size(), OS);
support::endian::Writer Writer(OS, support::little);
for (auto N : V)
- Writer.write(hashFuncName(N));
+ Writer.write(MD5Hash(N));
return sampleprof_error::success;
}
std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
const SampleProfileMap &ProfileMap) {
for (const auto &I : ProfileMap) {
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
addContext(I.second.getContext());
addNames(I.second);
}
@@ -725,7 +726,8 @@ SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
// Generate the name table for all the functions referenced in the profile.
for (const auto &I : ProfileMap) {
- addContext(I.second.getContext());
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ addContext(I.first);
addNames(I.second);
}
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 79bee8362ef8a9..3ddf5fe20edb27 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -201,7 +201,7 @@ SampleContextTracker::SampleContextTracker(
: GUIDToFuncNameMap(GUIDToFuncNameMap) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
- SampleContext Context = FuncSample.second.getContext();
+ SampleContext Context = FuncSample.first;
LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
<< "\n");
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
@@ -638,7 +638,7 @@ void SampleContextTracker::createContextLessProfileMap(
FunctionSamples *FProfile = Node->getFunctionSamples();
// Profile's context can be empty, use ContextNode's func name.
if (FProfile)
- ContextLessProfiles.Create(Node->getFuncName()).merge(*FProfile);
+ ContextLessProfiles[Node->getFuncName()].merge(*FProfile);
}
}
} // namespace llvm
diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata b/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata
index 76f19bbc7e513a..699972870dae43 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata and b/llvm/test/tools/llvm-profdata/Inputs/sample-nametable-after-samples.profdata
diff er
diff --git a/llvm/test/tools/llvm-profdata/sample-nametable.test b/llvm/test/tools/llvm-profdata/sample-nametable.test
index d65b9737a77e31..1d59b9fa43725d 100644
--- a/llvm/test/tools/llvm-profdata/sample-nametable.test
+++ b/llvm/test/tools/llvm-profdata/sample-nametable.test
@@ -8,5 +8,5 @@ RUN: not llvm-profdata show --sample %p/Inputs/sample-nametable-empty-string.pro
3- The data of the name table is placed after the data of the profiles. The reader should handle it correctly.
RUN: llvm-profdata merge --sample --text %p/Inputs/sample-nametable-after-samples.profdata | FileCheck %s
-CHECK: 18446744073709551613:2:9
+CHECK: 18446744073709551615:2:9
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index a11fc5753497f7..da10ddcc58c642 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -594,7 +594,7 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
auto checkSampleProfileHasFUnique = [&Reader]() {
for (const auto &PD : Reader->getProfiles()) {
- auto &FContext = PD.second.getContext();
+ auto &FContext = PD.first;
if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
std::string::npos) {
return true;
@@ -2833,8 +2833,7 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
"be printed");
// TODO: parse context string to support filtering by contexts.
- FunctionSamples *FS = Reader->getSamplesFor(StringRef(ShowFunction));
- Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
+ Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
}
if (ShowProfileSymbolList) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index d307ab46567629..97bc8d59b6cd7b 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,7 +449,7 @@ bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
bool ProfileGenerator::collectFunctionsFromLLVMProfile(
std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
for (const auto &FS : ProfileMap) {
- if (auto *Func = Binary->getBinaryFunction(FS.second.getName()))
+ if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
ProfiledFunctions.insert(Func);
}
return true;
@@ -468,7 +468,12 @@ bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
FunctionSamples &
ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
SampleContext Context(FuncName);
- return ProfileMap.Create(Context);
+ auto Ret = ProfileMap.emplace(Context, FunctionSamples());
+ if (Ret.second) {
+ FunctionSamples &FProfile = Ret.first->second;
+ FProfile.setContext(Context);
+ }
+ return Ret.first->second;
}
void ProfileGenerator::generateProfile() {
@@ -500,14 +505,14 @@ void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
return;
// Move cold profiles into a tmp container.
- std::vector<hash_code> ColdProfileHashes;
+ std::vector<SampleContext> ColdProfiles;
for (const auto &I : ProfileMap) {
if (I.second.getTotalSamples() < ColdCntThreshold)
- ColdProfileHashes.emplace_back(I.first);
+ ColdProfiles.emplace_back(I.first);
}
// Remove the cold profile from ProfileMap.
- for (const auto &I : ColdProfileHashes)
+ for (const auto &I : ColdProfiles)
ProfileMap.erase(I);
}
@@ -1013,7 +1018,9 @@ void CSProfileGenerator::postProcessProfiles() {
// Merge function samples of CS profile to calculate profile density.
sampleprof::SampleProfileMap ContextLessProfiles;
- ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
+ for (const auto &I : ProfileMap) {
+ ContextLessProfiles[I.second.getName()].merge(I.second);
+ }
calculateAndShowDensity(ContextLessProfiles);
if (GenCSNestedProfile) {
diff --git a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
index c53baf69bbc0ef..ad91ce36bcb5be 100644
--- a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
+++ b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
@@ -6,7 +6,6 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(LLVMProfdataTests
OutputSizeLimitTest.cpp
- MD5CollisionTest.cpp
)
target_link_libraries(LLVMProfdataTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp b/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
deleted file mode 100644
index 4ae57cd711c179..00000000000000
--- a/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//===- llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-/// Test whether the MD5-key SampleProfileMap can handle collision correctly.
-/// Probability of collision is rare but not negligible since we only use the
-/// lower 64 bits of the MD5 value. A unit test is required because the function
-/// names are not printable ASCII characters.
-
-#include "llvm/ProfileData/SampleProfReader.h"
-#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Testing/Support/Error.h"
-#include "gtest/gtest.h"
-
-/// According to https://en.wikipedia.org/wiki/MD5#Preimage_vulnerability, the
-/// MD5 of the two strings are 79054025255fb1a26e4bc422aef54eb4.
-
-// First 8 bytes of the MD5.
-const llvm::hash_code ExpectedHash = 0xa2b15f2525400579;
-
-// clang-format off
-const uint8_t ProfileData[] = {
- 0x84, 0xe4, 0xd0, 0xb1, 0xf4, 0xc9, 0x94, 0xa8,
- 0x53, 0x67, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00,
-
- /// Name Table
- 0x02,
- /// String1
- 0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
- 0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
- 0x2f, 0xca, 0xb5, 0x87, 0x12, 0x46, 0x7e, 0xab,
- 0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
- 0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
- 0x83, 0xe4, 0x88, 0x83, 0x25, 0x71, 0x41, 0x5a,
- 0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
- 0xd9, 0x1d, 0xbd, 0xf2, 0x80, 0x37, 0x3c, 0x5b,
- 0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
- 0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
- 0xdd, 0x53, 0xe2, 0xb4, 0x87, 0xda, 0x03, 0xfd,
- 0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
- 0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
- 0xce, 0x54, 0xb6, 0x70, 0x80, 0xa8, 0x0d, 0x1e,
- 0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
- 0x96, 0xf9, 0x65, 0x2b, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
- /// String2
- 0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
- 0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
- 0x2f, 0xca, 0xb5, 0x07, 0x12, 0x46, 0x7e, 0xab,
- 0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
- 0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
- 0x83, 0xe4, 0x88, 0x83, 0x25, 0xf1, 0x41, 0x5a,
- 0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
- 0xd9, 0x1d, 0xbd, 0x72, 0x80, 0x37, 0x3c, 0x5b,
- 0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
- 0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
- 0xdd, 0x53, 0xe2, 0x34, 0x87, 0xda, 0x03, 0xfd,
- 0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
- 0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
- 0xce, 0x54, 0xb6, 0x70, 0x80, 0x28, 0x0d, 0x1e,
- 0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
- 0x96, 0xf9, 0x65, 0xab, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
-
- /// FuncOffsetTable
- 0x02, 0x00, 0x00, 0x01, 0x17, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-
- /// Samples
- /// String1:10:1
- /// 1: 5
- /// 2.3: 6
- /// 4: String2:100
- /// 1: 100
- /// String2:7:3
- /// 9: 0
- 0x01, 0x00, 0x0a, 0x02, 0x01, 0x00, 0x05, 0x00,
- 0x02, 0x03, 0x06, 0x00, 0x01, 0x04, 0x00, 0x01,
- 0x64, 0x01, 0x01, 0x00, 0x64, 0x00, 0x00,
-
- 0x03, 0x01, 0x07, 0x01, 0x09, 0x00, 0x00, 0x00,
- 0x00};
-// clang-format on
-
-using namespace llvm;
-using namespace llvm::sampleprof;
-
-TEST(MD5CollisionTest, TestCollision) {
- auto InputBuffer = MemoryBuffer::getMemBuffer(
- StringRef(reinterpret_cast<const char *>(ProfileData),
- sizeof(ProfileData)),
- "", false);
- LLVMContext Context;
- auto FileSystem = vfs::getRealFileSystem();
- auto Result = SampleProfileReader::create(InputBuffer, Context, *FileSystem);
- ASSERT_TRUE(Result);
- SampleProfileReader *Reader = Result->get();
- ASSERT_FALSE(Reader->read());
-
- std::vector<StringRef> &NameTable = *Reader->getNameTable();
- ASSERT_EQ(NameTable.size(), 2U);
- StringRef S1 = NameTable[0];
- StringRef S2 = NameTable[1];
- ASSERT_NE(S1, S2);
- ASSERT_EQ(hash_code(MD5Hash(S1)), ExpectedHash);
- ASSERT_EQ(hash_code(MD5Hash(S2)), ExpectedHash);
-
- // S2's MD5 value collides with S1, S1 is expected to be dropped when S2 is
- // inserted, as if S1 never existed.
-
- FunctionSamples ExpectedFS;
- ExpectedFS.setName(S2);
- ExpectedFS.setHeadSamples(3);
- ExpectedFS.setTotalSamples(7);
- ExpectedFS.addBodySamples(9, 0, 0);
-
- SampleProfileMap &Profiles = Reader->getProfiles();
- EXPECT_EQ(Profiles.size(), 1U);
- if (Profiles.size()) {
- auto &[Hash, FS] = *Profiles.begin();
- EXPECT_EQ(Hash, ExpectedHash);
- EXPECT_EQ(FS, ExpectedFS);
- }
-
- // Inserting S2 again should fail, returning the existing sample unchanged.
- auto [It1, Inserted1] = Profiles.try_emplace(S2, FunctionSamples());
- EXPECT_FALSE(Inserted1);
- EXPECT_EQ(Profiles.size(), 1U);
- if (Profiles.size()) {
- auto &[Hash, FS] = *It1;
- EXPECT_EQ(Hash, ExpectedHash);
- EXPECT_EQ(FS, ExpectedFS);
- }
-
- // Inserting S1 should success as if S2 never existed, and S2 is erased.
- FunctionSamples FS1;
- FS1.setName(S1);
- FS1.setHeadSamples(5);
- FS1.setTotalSamples(10);
- FS1.addBodySamples(1, 2, 5);
-
- auto [It2, Inserted2] = Profiles.try_emplace(S1, FS1);
- EXPECT_TRUE(Inserted2);
- EXPECT_EQ(Profiles.size(), 1U);
- if (Profiles.size()) {
- auto &[Hash, FS] = *It2;
- EXPECT_EQ(Hash, ExpectedHash);
- EXPECT_EQ(FS, FS1);
- }
-}
diff --git a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
index d21c378e8ce3b2..522a2c2dd33677 100644
--- a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
+++ b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
@@ -126,7 +126,7 @@ static ExpectedErrorOr<void *> RunTest(StringRef Input, size_t SizeLimit,
// For every sample in the new profile, confirm it is in the old profile and
// unchanged.
for (auto Sample : NewProfiles) {
- auto FindResult = OldProfiles.find(Sample.second.getContext());
+ auto FindResult = OldProfiles.find(Sample.first);
EXPECT_NE(FindResult, OldProfiles.end());
if (FindResult != OldProfiles.end()) {
EXPECT_EQ(Sample.second.getHeadSamples(),
More information about the llvm-commits
mailing list