[clang-tools-extra] r345113 - [clangd] Truncate SymbolID to 16 bytes.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 23 23:58:42 PDT 2018


Author: sammccall
Date: Tue Oct 23 23:58:42 2018
New Revision: 345113

URL: http://llvm.org/viewvc/llvm-project?rev=345113&view=rev
Log:
[clangd] Truncate SymbolID to 16 bytes.

Summary:
The goal is 8 bytes, which has a nonzero risk of collisions with huge indexes.
This patch should shake out any issues with truncation at all, we can lower
further later.

Reviewers: ioeric

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D53587

Modified:
    clang-tools-extra/trunk/clangd/index/Index.cpp
    clang-tools-extra/trunk/clangd/index/Index.h
    clang-tools-extra/trunk/clangd/index/Serialization.cpp
    clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Index.cpp Tue Oct 23 23:58:42 2018
@@ -43,8 +43,11 @@ raw_ostream &operator<<(raw_ostream &OS,
             << "-" << L.End.line() << ":" << L.End.column() << ")";
 }
 
-SymbolID::SymbolID(StringRef USR)
-    : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
+SymbolID::SymbolID(StringRef USR) {
+  auto Hash = SHA1::hash(arrayRefFromStringRef(USR));
+  static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1");
+  memcpy(HashValue.data(), Hash.data(), RawSize);
+}
 
 raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
   return OS << toHex(ID.raw());

Modified: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (original)
+++ clang-tools-extra/trunk/clangd/index/Index.h Tue Oct 23 23:58:42 2018
@@ -89,7 +89,7 @@ llvm::raw_ostream &operator<<(llvm::raw_
 // The class identifies a particular C++ symbol (class, function, method, etc).
 //
 // As USRs (Unified Symbol Resolution) could be large, especially for functions
-// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
+// with long type arguments, SymbolID is using truncated SHA1(USR) values to
 // guarantee the uniqueness of symbols while using a relatively small amount of
 // memory (vs storing USRs directly).
 //
@@ -106,13 +106,16 @@ public:
     return HashValue < Sym.HashValue;
   }
 
-  constexpr static size_t RawSize = 20;
+  // The stored hash is truncated to RawSize bytes.
+  // This trades off memory against the number of symbols we can handle.
+  // FIXME: can we reduce this further to 8 bytes?
+  constexpr static size_t RawSize = 16;
   llvm::StringRef raw() const {
     return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize);
   }
   static SymbolID fromRaw(llvm::StringRef);
 
-  // Returns a 40-bytes hex encoded string.
+  // Returns a hex encoded string.
   std::string str() const;
   static llvm::Expected<SymbolID> fromStr(llvm::StringRef);
 

Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Tue Oct 23 23:58:42 2018
@@ -300,7 +300,7 @@ Symbol readSymbol(Reader &Data, ArrayRef
 
 // REFS ENCODING
 // A refs section has data grouped by Symbol. Each symbol has:
-//  - SymbolID: 20 bytes
+//  - SymbolID: 16 bytes
 //  - NumRefs: varint
 //  - Ref[NumRefs]
 // Fields of Ref are encoded in turn, see implementation.
@@ -338,7 +338,7 @@ std::pair<SymbolID, std::vector<Ref>> re
 // The current versioning scheme is simple - non-current versions are rejected.
 // If you make a breaking change, bump this version number to invalidate stored
 // data. Later we may want to support some backward compatibility.
-constexpr static uint32_t Version = 5;
+constexpr static uint32_t Version = 6;
 
 Expected<IndexFileIn> readRIFF(StringRef Data) {
   auto RIFF = riff::readFile(Data);

Modified: clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp Tue Oct 23 23:58:42 2018
@@ -27,7 +27,7 @@ namespace {
 const char *YAML = R"(
 ---
 !Symbol
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
+ID: 057557CEBF6E6B2DD437FBF60CC58F35
 Name:   'Foo1'
 Scope:   'clang::'
 SymInfo:
@@ -53,7 +53,7 @@ IncludeHeaders:
 ...
 ---
 !Symbol
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
+ID: 057557CEBF6E6B2DD437FBF60CC58F36
 Name:   'Foo2'
 Scope:   'clang::'
 SymInfo:
@@ -72,7 +72,7 @@ Signature:    '-sig'
 CompletionSnippetSuffix:    '-snippet'
 ...
 !Refs
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
+ID: 057557CEBF6E6B2DD437FBF60CC58F35
 References:
   - Kind: 4
     Location:
@@ -98,15 +98,14 @@ TEST(SerializationTest, YAMLConversions)
   auto ParsedYAML = readIndexFile(YAML);
   ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
   ASSERT_TRUE(bool(ParsedYAML->Symbols));
-  EXPECT_THAT(
-      *ParsedYAML->Symbols,
-      UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"),
-                           ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF858")));
+  EXPECT_THAT(*ParsedYAML->Symbols,
+              UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F35"),
+                                   ID("057557CEBF6E6B2DD437FBF60CC58F36")));
 
   auto Sym1 = *ParsedYAML->Symbols->find(
-      cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF856")));
+      cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35")));
   auto Sym2 = *ParsedYAML->Symbols->find(
-      cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF858")));
+      cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F36")));
 
   EXPECT_THAT(Sym1, QName("clang::Foo1"));
   EXPECT_EQ(Sym1.Signature, "");
@@ -128,11 +127,11 @@ TEST(SerializationTest, YAMLConversions)
   EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
 
   ASSERT_TRUE(bool(ParsedYAML->Refs));
-  EXPECT_THAT(*ParsedYAML->Refs,
-              UnorderedElementsAre(
-                  Pair(cantFail(SymbolID::fromStr(
-                           "057557CEBF6E6B2DD437FBF60CC58F352D1DF856")),
-                       testing::SizeIs(1))));
+  EXPECT_THAT(
+      *ParsedYAML->Refs,
+      UnorderedElementsAre(
+          Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35")),
+               testing::SizeIs(1))));
   auto Ref1 = ParsedYAML->Refs->begin()->second.front();
   EXPECT_EQ(Ref1.Kind, RefKind::Reference);
   EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc");




More information about the cfe-commits mailing list