[clang-tools-extra] r345113 - [clangd] Truncate SymbolID to 16 bytes.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 23 23:58:42 PDT 2018
Author: sammccall
Date: Tue Oct 23 23:58:42 2018
New Revision: 345113
URL: http://llvm.org/viewvc/llvm-project?rev=345113&view=rev
Log:
[clangd] Truncate SymbolID to 16 bytes.
Summary:
The goal is 8 bytes, which has a nonzero risk of collisions with huge indexes.
This patch should shake out any issues with truncation at all, we can lower
further later.
Reviewers: ioeric
Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits
Differential Revision: https://reviews.llvm.org/D53587
Modified:
clang-tools-extra/trunk/clangd/index/Index.cpp
clang-tools-extra/trunk/clangd/index/Index.h
clang-tools-extra/trunk/clangd/index/Serialization.cpp
clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Index.cpp Tue Oct 23 23:58:42 2018
@@ -43,8 +43,11 @@ raw_ostream &operator<<(raw_ostream &OS,
<< "-" << L.End.line() << ":" << L.End.column() << ")";
}
-SymbolID::SymbolID(StringRef USR)
- : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
+SymbolID::SymbolID(StringRef USR) {
+ auto Hash = SHA1::hash(arrayRefFromStringRef(USR));
+ static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1");
+ memcpy(HashValue.data(), Hash.data(), RawSize);
+}
raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
return OS << toHex(ID.raw());
Modified: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (original)
+++ clang-tools-extra/trunk/clangd/index/Index.h Tue Oct 23 23:58:42 2018
@@ -89,7 +89,7 @@ llvm::raw_ostream &operator<<(llvm::raw_
// The class identifies a particular C++ symbol (class, function, method, etc).
//
// As USRs (Unified Symbol Resolution) could be large, especially for functions
-// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
+// with long type arguments, SymbolID is using truncated SHA1(USR) values to
// guarantee the uniqueness of symbols while using a relatively small amount of
// memory (vs storing USRs directly).
//
@@ -106,13 +106,16 @@ public:
return HashValue < Sym.HashValue;
}
- constexpr static size_t RawSize = 20;
+ // The stored hash is truncated to RawSize bytes.
+ // This trades off memory against the number of symbols we can handle.
+ // FIXME: can we reduce this further to 8 bytes?
+ constexpr static size_t RawSize = 16;
llvm::StringRef raw() const {
return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize);
}
static SymbolID fromRaw(llvm::StringRef);
- // Returns a 40-bytes hex encoded string.
+ // Returns a hex encoded string.
std::string str() const;
static llvm::Expected<SymbolID> fromStr(llvm::StringRef);
Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Tue Oct 23 23:58:42 2018
@@ -300,7 +300,7 @@ Symbol readSymbol(Reader &Data, ArrayRef
// REFS ENCODING
// A refs section has data grouped by Symbol. Each symbol has:
-// - SymbolID: 20 bytes
+// - SymbolID: 16 bytes
// - NumRefs: varint
// - Ref[NumRefs]
// Fields of Ref are encoded in turn, see implementation.
@@ -338,7 +338,7 @@ std::pair<SymbolID, std::vector<Ref>> re
// The current versioning scheme is simple - non-current versions are rejected.
// If you make a breaking change, bump this version number to invalidate stored
// data. Later we may want to support some backward compatibility.
-constexpr static uint32_t Version = 5;
+constexpr static uint32_t Version = 6;
Expected<IndexFileIn> readRIFF(StringRef Data) {
auto RIFF = riff::readFile(Data);
Modified: clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp?rev=345113&r1=345112&r2=345113&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp Tue Oct 23 23:58:42 2018
@@ -27,7 +27,7 @@ namespace {
const char *YAML = R"(
---
!Symbol
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
+ID: 057557CEBF6E6B2DD437FBF60CC58F35
Name: 'Foo1'
Scope: 'clang::'
SymInfo:
@@ -53,7 +53,7 @@ IncludeHeaders:
...
---
!Symbol
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
+ID: 057557CEBF6E6B2DD437FBF60CC58F36
Name: 'Foo2'
Scope: 'clang::'
SymInfo:
@@ -72,7 +72,7 @@ Signature: '-sig'
CompletionSnippetSuffix: '-snippet'
...
!Refs
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
+ID: 057557CEBF6E6B2DD437FBF60CC58F35
References:
- Kind: 4
Location:
@@ -98,15 +98,14 @@ TEST(SerializationTest, YAMLConversions)
auto ParsedYAML = readIndexFile(YAML);
ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
ASSERT_TRUE(bool(ParsedYAML->Symbols));
- EXPECT_THAT(
- *ParsedYAML->Symbols,
- UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"),
- ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF858")));
+ EXPECT_THAT(*ParsedYAML->Symbols,
+ UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F35"),
+ ID("057557CEBF6E6B2DD437FBF60CC58F36")));
auto Sym1 = *ParsedYAML->Symbols->find(
- cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF856")));
+ cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35")));
auto Sym2 = *ParsedYAML->Symbols->find(
- cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF858")));
+ cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F36")));
EXPECT_THAT(Sym1, QName("clang::Foo1"));
EXPECT_EQ(Sym1.Signature, "");
@@ -128,11 +127,11 @@ TEST(SerializationTest, YAMLConversions)
EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
ASSERT_TRUE(bool(ParsedYAML->Refs));
- EXPECT_THAT(*ParsedYAML->Refs,
- UnorderedElementsAre(
- Pair(cantFail(SymbolID::fromStr(
- "057557CEBF6E6B2DD437FBF60CC58F352D1DF856")),
- testing::SizeIs(1))));
+ EXPECT_THAT(
+ *ParsedYAML->Refs,
+ UnorderedElementsAre(
+ Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35")),
+ testing::SizeIs(1))));
auto Ref1 = ParsedYAML->Refs->begin()->second.front();
EXPECT_EQ(Ref1.Kind, RefKind::Reference);
EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc");
More information about the cfe-commits
mailing list