[llvm] 874dcae - [Serialization] Use stable hash functions
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 20 23:53:11 PDT 2024
Author: Fangrui Song
Date: 2024-06-20T23:53:07-07:00
New Revision: 874dcaea09171100de02fa6d5c0af0669aea900f
URL: https://github.com/llvm/llvm-project/commit/874dcaea09171100de02fa6d5c0af0669aea900f
DIFF: https://github.com/llvm/llvm-project/commit/874dcaea09171100de02fa6d5c0af0669aea900f.diff
LOG: [Serialization] Use stable hash functions
clangSerialization currently uses hash_combine/hash_value from
Hashing.h, which are not guaranteed to be deterministic.
Replace these uses with xxh3_64bits.
Pull Request: https://github.com/llvm/llvm-project/pull/96136
Added:
Modified:
clang/lib/AST/ODRHash.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTWriter.cpp
llvm/include/llvm/ADT/FoldingSet.h
Removed:
################################################################################
diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp
index 1249531eab09f..fbfe92318dc5e 100644
--- a/clang/lib/AST/ODRHash.cpp
+++ b/clang/lib/AST/ODRHash.cpp
@@ -251,7 +251,7 @@ unsigned ODRHash::CalculateHash() {
assert(I == Bools.rend());
Bools.clear();
- return ID.ComputeHash();
+ return ID.computeStableHash();
}
namespace {
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 552a3af546e75..c0b7db72fb603 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1201,7 +1201,7 @@ unsigned DeclarationNameKey::getHash() const {
break;
}
- return ID.ComputeHash();
+ return ID.computeStableHash();
}
ModuleFile *
@@ -2033,7 +2033,10 @@ const FileEntry *HeaderFileInfoTrait::getFile(const internal_key_type &Key) {
}
unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
- return llvm::hash_combine(ikey.Size, ikey.ModTime);
+ uint8_t buf[sizeof(ikey.Size) + sizeof(ikey.ModTime)];
+ memcpy(buf, &ikey.Size, sizeof(ikey.Size));
+ memcpy(buf + sizeof(ikey.Size), &ikey.ModTime, sizeof(ikey.ModTime));
+ return llvm::xxh3_64bits(buf);
}
HeaderFileInfoTrait::internal_key_type
@@ -2640,8 +2643,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
return OriginalChange;
}
- // FIXME: hash_value is not guaranteed to be stable!
- auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
+ auto ContentHash = xxh3_64bits(MemBuffOrError.get()->getBuffer());
if (StoredContentHash == static_cast<uint64_t>(ContentHash))
return Change{Change::None};
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 0297e20e9116f..e00bacfd940b9 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1782,7 +1782,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
.ValidateASTInputFilesContent) {
auto MemBuff = Cache->getBufferIfLoaded();
if (MemBuff)
- ContentHash = hash_value(MemBuff->getBuffer());
+ ContentHash = xxh3_64bits(MemBuff->getBuffer());
else
PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
<< Entry.File.getName();
@@ -1987,7 +1987,10 @@ namespace {
// The hash is based only on size/time of the file, so that the reader can
// match even when symlinking or excess path elements ("foo/../", "../")
// change the form of the name. However, complete path is still the key.
- return llvm::hash_combine(key.Size, key.ModTime);
+ uint8_t buf[sizeof(key.Size) + sizeof(key.ModTime)];
+ memcpy(buf, &key.Size, sizeof(key.Size));
+ memcpy(buf + sizeof(key.Size), &key.ModTime, sizeof(key.ModTime));
+ return llvm::xxh3_64bits(buf);
}
std::pair<unsigned, unsigned>
diff --git a/llvm/include/llvm/ADT/FoldingSet.h b/llvm/include/llvm/ADT/FoldingSet.h
index f82eabd5044b2..3c2eaade57e47 100644
--- a/llvm/include/llvm/ADT/FoldingSet.h
+++ b/llvm/include/llvm/ADT/FoldingSet.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/xxhash.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -294,12 +295,19 @@ class FoldingSetNodeIDRef {
FoldingSetNodeIDRef() = default;
FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
- /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
- /// used to lookup the node in the FoldingSetBase.
+ // Compute a strong hash value used to lookup the node in the FoldingSetBase.
+ // The hash value is not guaranteed to be deterministic across processes.
unsigned ComputeHash() const {
return static_cast<unsigned>(hash_combine_range(Data, Data + Size));
}
+ // Compute a deterministic hash value across processes that is suitable for
+ // on-disk serialization.
+ unsigned computeStableHash() const {
+ return static_cast<unsigned>(xxh3_64bits(ArrayRef(
+ reinterpret_cast<const uint8_t *>(Data), sizeof(unsigned) * Size)));
+ }
+
bool operator==(FoldingSetNodeIDRef) const;
bool operator!=(FoldingSetNodeIDRef RHS) const { return !(*this == RHS); }
@@ -366,12 +374,19 @@ class FoldingSetNodeID {
/// object to be used to compute a new profile.
inline void clear() { Bits.clear(); }
- /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used
- /// to lookup the node in the FoldingSetBase.
+ // Compute a strong hash value for this FoldingSetNodeID, used to lookup the
+ // node in the FoldingSetBase. The hash value is not guaranteed to be
+ // deterministic across processes.
unsigned ComputeHash() const {
return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
}
+ // Compute a deterministic hash value across processes that is suitable for
+ // on-disk serialization.
+ unsigned computeStableHash() const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()).computeStableHash();
+ }
+
/// operator== - Used to compare two nodes to each other.
bool operator==(const FoldingSetNodeID &RHS) const;
bool operator==(const FoldingSetNodeIDRef RHS) const;
More information about the llvm-commits
mailing list