[clang-tools-extra] r321272 - [clangd] Index symbols share storage within a slab.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 21 06:58:44 PST 2017


Author: sammccall
Date: Thu Dec 21 06:58:44 2017
New Revision: 321272

URL: http://llvm.org/viewvc/llvm-project?rev=321272&view=rev
Log:
[clangd] Index symbols share storage within a slab.

Summary:
Symbols are not self-contained - it's only safe to hand them out if you
guarantee the lifetime of the underlying data.

Before this lands, I'm going to measure the before/after memory usage of the
LLVM index loaded into memory in a single slab.

Reviewers: hokein

Subscribers: klimek, ilya-biryukov, cfe-commits

Differential Revision: https://reviews.llvm.org/D41483

Modified:
    clang-tools-extra/trunk/clangd/index/Index.cpp
    clang-tools-extra/trunk/clangd/index/Index.h
    clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
    clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
    clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=321272&r1=321271&r2=321272&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Index.cpp Thu Dec 21 06:58:44 2017
@@ -38,9 +38,17 @@ SymbolSlab::const_iterator SymbolSlab::f
 
 void SymbolSlab::freeze() { Frozen = true; }
 
-void SymbolSlab::insert(Symbol S) {
+void SymbolSlab::insert(const Symbol &S) {
   assert(!Frozen && "Can't insert a symbol after the slab has been frozen!");
-  Symbols[S.ID] = std::move(S);
+  auto ItInserted = Symbols.try_emplace(S.ID, S);
+  if (!ItInserted.second)
+    return;
+  auto &Sym = ItInserted.first->second;
+
+  // We inserted a new symbol, so copy the underlying data.
+  intern(Sym.Name);
+  intern(Sym.Scope);
+  intern(Sym.CanonicalDeclaration.FilePath);
 }
 
 } // namespace clangd

Modified: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=321272&r1=321271&r2=321272&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (original)
+++ clang-tools-extra/trunk/clangd/index/Index.h Thu Dec 21 06:58:44 2017
@@ -15,6 +15,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include <array>
 #include <string>
 
@@ -23,7 +24,7 @@ namespace clangd {
 
 struct SymbolLocation {
   // The absolute path of the source file where a symbol occurs.
-  std::string FilePath;
+  llvm::StringRef FilePath;
   // The 0-based offset to the first character of the symbol from the beginning
   // of the source file.
   unsigned StartOffset;
@@ -71,16 +72,19 @@ void operator>>(llvm::StringRef HexStr,
 
 // The class presents a C++ symbol, e.g. class, function.
 //
-// FIXME: instead of having own copy fields for each symbol, we can share
-// storage from SymbolSlab.
+// WARNING: Symbols do not own much of their underlying data - typically strings
+// are owned by a SymbolSlab. They should be treated as non-owning references.
+// Copies are shallow.
+// When adding new unowned data fields to Symbol, remember to update
+// SymbolSlab::insert to copy them to the slab's storage.
 struct Symbol {
   // The ID of the symbol.
   SymbolID ID;
   // The unqualified name of the symbol, e.g. "bar" (for "n1::n2::bar").
-  std::string Name;
+  llvm::StringRef Name;
   // The scope (e.g. namespace) of the symbol, e.g. "n1::n2" (for
   // "n1::n2::bar").
-  std::string Scope;
+  llvm::StringRef Scope;
   // The symbol information, like symbol kind.
   index::SymbolInfo SymInfo;
   // The location of the canonical declaration of the symbol.
@@ -100,9 +104,6 @@ struct Symbol {
 
 // A symbol container that stores a set of symbols. The container will maintain
 // the lifetime of the symbols.
-//
-// FIXME: Use a space-efficient implementation, a lot of Symbol fields could
-// share the same storage.
 class SymbolSlab {
 public:
   using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator;
@@ -117,11 +118,21 @@ public:
   // operation is irreversible.
   void freeze();
 
-  void insert(Symbol S);
+  // Adds the symbol to this slab.
+  // This is a deep copy: underlying strings will be owned by the slab.
+  void insert(const Symbol& S);
 
 private:
+  // Replaces S with a reference to the same string, owned by this slab.
+  void intern(llvm::StringRef &S) {
+    S = S.empty() ? llvm::StringRef() : Strings.insert(S).first->getKey();
+  }
+
   bool Frozen = false;
 
+  // Intern table for strings. Not StringPool as we don't refcount, just insert.
+  // We use BumpPtrAllocator to avoid lots of tiny allocations for nodes.
+  llvm::StringSet<llvm::BumpPtrAllocator> Strings;
   llvm::DenseMap<SymbolID, Symbol> Symbols;
 };
 

Modified: clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp?rev=321272&r1=321271&r2=321272&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp Thu Dec 21 06:58:44 2017
@@ -93,7 +93,8 @@ std::vector<std::string> match(const Sym
   std::vector<std::string> Matches;
   auto Ctx = Context::empty();
   I.fuzzyFind(Ctx, Req, [&](const Symbol &Sym) {
-    Matches.push_back(Sym.Scope + (Sym.Scope.empty() ? "" : "::") + Sym.Name);
+    Matches.push_back(
+        (Sym.Scope + (Sym.Scope.empty() ? "" : "::") + Sym.Name).str());
   });
   return Matches;
 }

Modified: clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp?rev=321272&r1=321271&r2=321272&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp Thu Dec 21 06:58:44 2017
@@ -75,7 +75,8 @@ std::vector<std::string> match(const Sym
   std::vector<std::string> Matches;
   auto Ctx = Context::empty();
   I.fuzzyFind(Ctx, Req, [&](const Symbol &Sym) {
-    Matches.push_back(Sym.Scope + (Sym.Scope.empty() ? "" : "::") + Sym.Name);
+    Matches.push_back(
+        (Sym.Scope + (Sym.Scope.empty() ? "" : "::") + Sym.Name).str());
   });
   return Matches;
 }

Modified: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp?rev=321272&r1=321271&r2=321272&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp Thu Dec 21 06:58:44 2017
@@ -33,7 +33,7 @@ using testing::UnorderedElementsAre;
 // GMock helpers for matching Symbol.
 MATCHER_P(QName, Name, "") {
   return (arg.second.Scope + (arg.second.Scope.empty() ? "" : "::") +
-          arg.second.Name) == Name;
+          arg.second.Name).str() == Name;
 }
 
 namespace clang {




More information about the cfe-commits mailing list