[clang-tools-extra] r365311 - [clangd] Use xxhash instead of SHA1 for background index file digests.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 8 04:33:19 PDT 2019


Author: sammccall
Date: Mon Jul  8 04:33:17 2019
New Revision: 365311

URL: http://llvm.org/viewvc/llvm-project?rev=365311&view=rev
Log:
[clangd] Use xxhash instead of SHA1 for background index file digests.

Summary:
Currently SHA1 is about 10% of our CPU, this patch reduces it to ~1%.

xxhash is a well-defined (stable) non-cryptographic hash optimized for
fast checksums (like crc32).
Collisions shouldn't be a problem, despite the reduced length:
 - for actual file content (used to invalidate bg index shards), there
   are only two versions that can collide (new shard and old shard).
 - for file paths in bg index shard filenames, we would need 2^32 files
   with the same filename to expect a collision. Imperfect hashing may
   reduce this a bit but it's well beyond what's plausible.

This will invalidate shards on disk (as usual; I bumped the version),
but this time the filenames are changing so the old files will stick
around :-( So this is more expensive than the usual bump, but would be
good to land before the v9 branch when everyone will start using bg index.

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64306

Modified:
    clang-tools-extra/trunk/clangd/SourceCode.cpp
    clang-tools-extra/trunk/clangd/SourceCode.h
    clang-tools-extra/trunk/clangd/index/Background.cpp
    clang-tools-extra/trunk/clangd/index/Background.h
    clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp
    clang-tools-extra/trunk/clangd/index/Serialization.cpp
    clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp

Modified: clang-tools-extra/trunk/clangd/SourceCode.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.cpp (original)
+++ clang-tools-extra/trunk/clangd/SourceCode.cpp Mon Jul  8 04:33:17 2019
@@ -25,6 +25,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/xxhash.h"
 #include <algorithm>
 
 namespace clang {
@@ -376,7 +377,13 @@ bool isRangeConsecutive(const Range &Lef
 }
 
 FileDigest digest(llvm::StringRef Content) {
-  return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+  uint64_t Hash{llvm::xxHash64(Content)};
+  FileDigest Result;
+  for (unsigned I = 0; I < Result.size(); ++I) {
+    Result[I] = uint8_t(Hash);
+    Hash >>= 8;
+  }
+  return Result;
 }
 
 llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {

Modified: clang-tools-extra/trunk/clangd/SourceCode.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.h?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.h (original)
+++ clang-tools-extra/trunk/clangd/SourceCode.h Mon Jul  8 04:33:17 2019
@@ -22,7 +22,6 @@
 #include "clang/Tooling/Core/Replacement.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/SHA1.h"
 
 namespace clang {
 class SourceManager;
@@ -32,7 +31,7 @@ namespace clangd {
 // We tend to generate digests for source codes in a lot of different places.
 // This represents the type for those digests to prevent us hard coding details
 // of hashing function at every place that needs to store this information.
-using FileDigest = decltype(llvm::SHA1::hash({}));
+using FileDigest = std::array<uint8_t, 8>;
 FileDigest digest(StringRef Content);
 Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
 

Modified: clang-tools-extra/trunk/clangd/index/Background.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Background.cpp Mon Jul  8 04:33:17 2019
@@ -32,7 +32,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/SHA1.h"
 #include "llvm/Support/Threading.h"
 
 #include <atomic>

Modified: clang-tools-extra/trunk/clangd/index/Background.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.h (original)
+++ clang-tools-extra/trunk/clangd/index/Background.h Mon Jul  8 04:33:17 2019
@@ -19,7 +19,6 @@
 #include "index/Serialization.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/SHA1.h"
 #include "llvm/Support/Threading.h"
 #include <atomic>
 #include <condition_variable>

Modified: clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp Mon Jul  8 04:33:17 2019
@@ -13,18 +13,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/SHA1.h"
 
 namespace clang {
 namespace clangd {
 namespace {
 
-using FileDigest = decltype(llvm::SHA1::hash({}));
-
-static FileDigest digest(StringRef Content) {
-  return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
-}
-
 std::string getShardPathFromFilePath(llvm::StringRef ShardRoot,
                                      llvm::StringRef FilePath) {
   llvm::SmallString<128> ShardRootSS(ShardRoot);

Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Mon Jul  8 04:33:17 2019
@@ -444,7 +444,7 @@ readCompileCommand(Reader CmdReader, llv
 // The current versioning scheme is simple - non-current versions are rejected.
 // If you make a breaking change, bump this version number to invalidate stored
 // data. Later we may want to support some backward compatibility.
-constexpr static uint32_t Version = 11;
+constexpr static uint32_t Version = 12;
 
 llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data) {
   auto RIFF = riff::readFile(Data);

Modified: clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp?rev=365311&r1=365310&r2=365311&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp (original)
+++ clang-tools-extra/trunk/clangd/unittests/SerializationTests.cpp Mon Jul  8 04:33:17 2019
@@ -10,7 +10,6 @@
 #include "index/Index.h"
 #include "index/Serialization.h"
 #include "clang/Tooling/CompilationDatabase.h"
-#include "llvm/Support/SHA1.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -208,9 +207,7 @@ TEST(SerializationTest, SrcsTest) {
 
   std::string TestContent("TestContent");
   IncludeGraphNode IGN;
-  IGN.Digest =
-      llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
-                        TestContent.size()});
+  IGN.Digest = digest(TestContent);
   IGN.DirectIncludes = {"inc1", "inc2"};
   IGN.URI = "URI";
   IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;




More information about the cfe-commits mailing list