[clang-tools-extra] r365311 - [clangd] Use xxhash instead of SHA1 for background index file digests.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 8 04:33:19 PDT 2019
Author: sammccall
Date: Mon Jul 8 04:33:17 2019
New Revision: 365311
URL: http://llvm.org/viewvc/llvm-project?rev=365311&view=rev
Log:
[clangd] Use xxhash instead of SHA1 for background index file digests.
Summary:
Currently SHA1 is about 10% of our CPU, this patch reduces it to ~1%.
xxhash is a well-defined (stable) non-cryptographic hash optimized for
fast checksums (like crc32).
Collisions shouldn't be a problem, despite the reduced length:
- for actual file content (used to invalidate bg index shards), there
are only two versions that can collide (new shard and old shard).
- for file paths in bg index shard filenames, we would need 2^32 files
with the same filename to expect a collision. Imperfect hashing may
reduce this a bit but it's well beyond what's plausible.
This will invalidate shards on disk (as usual; I bumped the version),
but this time the filenames are changing so the old files will stick
around :-( So this is more expensive than the usual bump, but would be
good to land before the v9 branch when everyone will start using bg index.
Reviewers: kadircet
Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64306
Modified:
clang-tools-extra/trunk/clangd/SourceCode.cpp
clang-tools-extra/trunk/clangd/SourceCode.h
clang-tools-extra/trunk/clangd/index/Background.cpp
clang-tools-extra/trunk/clangd/index/Background.h
clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp
clang-tools-extra/trunk/clangd/index/Serialization.cpp
clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp
Modified: clang-tools-extra/trunk/clangd/SourceCode.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.cpp (original)
+++ clang-tools-extra/trunk/clangd/SourceCode.cpp Mon Jul 8 04:33:17 2019
@@ -25,6 +25,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/xxhash.h"
#include <algorithm>
namespace clang {
@@ -376,7 +377,13 @@ bool isRangeConsecutive(const Range &Lef
}
FileDigest digest(llvm::StringRef Content) {
- return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+ uint64_t Hash{llvm::xxHash64(Content)};
+ FileDigest Result;
+ for (unsigned I = 0; I < Result.size(); ++I) {
+ Result[I] = uint8_t(Hash);
+ Hash >>= 8;
+ }
+ return Result;
}
llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
Modified: clang-tools-extra/trunk/clangd/SourceCode.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.h?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.h (original)
+++ clang-tools-extra/trunk/clangd/SourceCode.h Mon Jul 8 04:33:17 2019
@@ -22,7 +22,6 @@
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/SHA1.h"
namespace clang {
class SourceManager;
@@ -32,7 +31,7 @@ namespace clangd {
// We tend to generate digests for source codes in a lot of different places.
// This represents the type for those digests to prevent us hard coding details
// of hashing function at every place that needs to store this information.
-using FileDigest = decltype(llvm::SHA1::hash({}));
+using FileDigest = std::array<uint8_t, 8>;
FileDigest digest(StringRef Content);
Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
Modified: clang-tools-extra/trunk/clangd/index/Background.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Background.cpp Mon Jul 8 04:33:17 2019
@@ -32,7 +32,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/SHA1.h"
#include "llvm/Support/Threading.h"
#include <atomic>
Modified: clang-tools-extra/trunk/clangd/index/Background.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.h (original)
+++ clang-tools-extra/trunk/clangd/index/Background.h Mon Jul 8 04:33:17 2019
@@ -19,7 +19,6 @@
#include "index/Serialization.h"
#include "clang/Tooling/CompilationDatabase.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/SHA1.h"
#include "llvm/Support/Threading.h"
#include <atomic>
#include <condition_variable>
Modified: clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp Mon Jul 8 04:33:17 2019
@@ -13,18 +13,11 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/SHA1.h"
namespace clang {
namespace clangd {
namespace {
-using FileDigest = decltype(llvm::SHA1::hash({}));
-
-static FileDigest digest(StringRef Content) {
- return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
-}
-
std::string getShardPathFromFilePath(llvm::StringRef ShardRoot,
llvm::StringRef FilePath) {
llvm::SmallString<128> ShardRootSS(ShardRoot);
Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Mon Jul 8 04:33:17 2019
@@ -444,7 +444,7 @@ readCompileCommand(Reader CmdReader, llv
// The current versioning scheme is simple - non-current versions are rejected.
// If you make a breaking change, bump this version number to invalidate stored
// data. Later we may want to support some backward compatibility.
-constexpr static uint32_t Version = 11;
+constexpr static uint32_t Version = 12;
llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
auto RIFF = riff::readFile(Data);
Modified: clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp (original)
+++ clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp Mon Jul 8 04:33:17 2019
@@ -10,7 +10,6 @@
#include "index/Index.h"
#include "index/Serialization.h"
#include "clang/Tooling/CompilationDatabase.h"
-#include "llvm/Support/SHA1.h"
#include "llvm/Support/ScopedPrinter.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -208,9 +207,7 @@ TEST(SerializationTest, SrcsTest) {
std::string TestContent("TestContent");
IncludeGraphNode IGN;
- IGN.Digest =
- llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
- TestContent.size()});
+ IGN.Digest = digest(TestContent);
IGN.DirectIncludes = {"inc1", "inc2"};
IGN.URI = "URI";
IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;
More information about the cfe-commits
mailing list