[clang-tools-extra] d19265b - [clangd] Avoid wasteful data structures in RefSlab::Builder

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Mon May 18 13:44:54 PDT 2020


Author: Sam McCall
Date: 2020-05-18T22:34:59+02:00
New Revision: d19265b31e65c84ad908abce6e7f6e6d15c22258

URL: https://github.com/llvm/llvm-project/commit/d19265b31e65c84ad908abce6e7f6e6d15c22258
DIFF: https://github.com/llvm/llvm-project/commit/d19265b31e65c84ad908abce6e7f6e6d15c22258.diff

LOG: [clangd] Avoid wasteful data structures in RefSlab::Builder

Summary: This is worth another 10% or so on InedxBenchmark.DexBuild.

Reviewers: kbobyrev

Subscribers: ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79950

Added: 
    

Modified: 
    clang-tools-extra/clangd/index/Ref.cpp
    clang-tools-extra/clangd/index/Ref.h
    clang-tools-extra/clangd/index/SymbolLocation.cpp
    clang-tools-extra/clangd/index/SymbolLocation.h

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/index/Ref.cpp b/clang-tools-extra/clangd/index/Ref.cpp
index 115894ce6743..e622a3f78e42 100644
--- a/clang-tools-extra/clangd/index/Ref.cpp
+++ b/clang-tools-extra/clangd/index/Ref.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Ref.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace clang {
 namespace clangd {
@@ -33,27 +34,34 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Ref &R) {
 }
 
 void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) {
-  auto &M = Refs[ID];
-  if (M.count(S))
-    return;
-  Ref R = S;
-  R.Location.FileURI =
-      UniqueStrings.save(R.Location.FileURI).data();
-  M.insert(std::move(R));
+  Entry E = {ID, S};
+  E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data();
+  Entries.insert(std::move(E));
 }
 
 RefSlab RefSlab::Builder::build() && {
-  // We can reuse the arena, as it only has unique strings and we need them all.
-  // Reallocate refs on the arena to reduce waste and indirections when reading.
   std::vector<std::pair<SymbolID, llvm::ArrayRef<Ref>>> Result;
-  Result.reserve(Refs.size());
-  size_t NumRefs = 0;
-  for (auto &Sym : Refs) {
-    std::vector<Ref> SymRefs(Sym.second.begin(), Sym.second.end());
-    NumRefs += SymRefs.size();
-    Result.emplace_back(Sym.first, llvm::ArrayRef<Ref>(SymRefs).copy(Arena));
+  // We'll reuse the arena, as it only has unique strings and we need them all.
+  // We need to group refs by symbol and form contiguous arrays on the arena.
+  std::vector<std::pair<SymbolID, const Ref *>> Flat;
+  Flat.reserve(Entries.size());
+  for (const Entry &E : Entries)
+    Flat.emplace_back(E.Symbol, &E.Reference);
+  // Group by SymbolID.
+  llvm::sort(Flat, llvm::less_first());
+  std::vector<Ref> Refs;
+  // Loop over symbols, copying refs for each onto the arena.
+  for (auto I = Flat.begin(), End = Flat.end(); I != End;) {
+    SymbolID Sym = I->first;
+    Refs.clear();
+    do {
+      Refs.push_back(*I->second);
+      ++I;
+    } while (I != End && I->first == Sym);
+    llvm::sort(Refs); // By file, affects xrefs display order.
+    Result.emplace_back(Sym, llvm::ArrayRef<Ref>(Refs).copy(Arena));
   }
-  return RefSlab(std::move(Result), std::move(Arena), NumRefs);
+  return RefSlab(std::move(Result), std::move(Arena), Entries.size());
 }
 
 } // namespace clangd

diff  --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h
index 382ade712017..cba8adaee7c2 100644
--- a/clang-tools-extra/clangd/index/Ref.h
+++ b/clang-tools-extra/clangd/index/Ref.h
@@ -13,6 +13,8 @@
 #include "SymbolLocation.h"
 #include "clang/Index/IndexSymbol.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
@@ -132,9 +134,17 @@ class RefSlab {
     RefSlab build() &&;
 
   private:
+    // A ref we're storing with its symbol to consume with build().
+    // All strings are interned, so DenseMapInfo can use pointer comparisons.
+    struct Entry {
+      SymbolID Symbol;
+      Ref Reference;
+    };
+    friend struct llvm::DenseMapInfo<Entry>;
+
     llvm::BumpPtrAllocator Arena;
     llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
-    llvm::DenseMap<SymbolID, std::set<Ref>> Refs;
+    llvm::DenseSet<Entry> Entries;
   };
 
 private:
@@ -151,4 +161,31 @@ class RefSlab {
 } // namespace clangd
 } // namespace clang
 
+namespace llvm {
+template <> struct DenseMapInfo<clang::clangd::RefSlab::Builder::Entry> {
+  using Entry = clang::clangd::RefSlab::Builder::Entry;
+  static inline Entry getEmptyKey() {
+    static Entry E{clang::clangd::SymbolID(""), {}};
+    return E;
+  }
+  static inline Entry getTombstoneKey() {
+    static Entry E{clang::clangd::SymbolID("TOMBSTONE"), {}};
+    return E;
+  }
+  static unsigned getHashValue(const Entry &Val) {
+    return llvm::hash_combine(
+        Val.Symbol, reinterpret_cast<uintptr_t>(Val.Reference.Location.FileURI),
+        Val.Reference.Location.Start.rep(), Val.Reference.Location.End.rep());
+  }
+  static bool isEqual(const Entry &LHS, const Entry &RHS) {
+    return std::tie(LHS.Symbol, LHS.Reference.Location.FileURI,
+                    LHS.Reference.Kind) ==
+               std::tie(RHS.Symbol, RHS.Reference.Location.FileURI,
+                        RHS.Reference.Kind) &&
+           LHS.Reference.Location.Start == RHS.Reference.Location.Start &&
+           LHS.Reference.Location.End == RHS.Reference.Location.End;
+  }
+};
+}; // namespace llvm
+
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H

diff  --git a/clang-tools-extra/clangd/index/SymbolLocation.cpp b/clang-tools-extra/clangd/index/SymbolLocation.cpp
index aac55703c35f..61da267b93ce 100644
--- a/clang-tools-extra/clangd/index/SymbolLocation.cpp
+++ b/clang-tools-extra/clangd/index/SymbolLocation.cpp
@@ -15,18 +15,14 @@ constexpr uint32_t SymbolLocation::Position::MaxLine;
 constexpr uint32_t SymbolLocation::Position::MaxColumn;
 
 void SymbolLocation::Position::setLine(uint32_t L) {
-  if (L > MaxLine) {
-    Line = MaxLine;
-    return;
-  }
-  Line = L;
+  if (L > MaxLine)
+    L = MaxLine;
+  LineColumnPacked = (L << ColumnBits) | column();
 }
 void SymbolLocation::Position::setColumn(uint32_t Col) {
-  if (Col > MaxColumn) {
-    Column = MaxColumn;
-    return;
-  }
-  Column = Col;
+  if (Col > MaxColumn)
+    Col = MaxColumn;
+  LineColumnPacked = (LineColumnPacked & ~MaxColumn) | Col;
 }
 
 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolLocation &L) {

diff  --git a/clang-tools-extra/clangd/index/SymbolLocation.h b/clang-tools-extra/clangd/index/SymbolLocation.h
index 9b503546a9f7..2dd33839115e 100644
--- a/clang-tools-extra/clangd/index/SymbolLocation.h
+++ b/clang-tools-extra/clangd/index/SymbolLocation.h
@@ -30,22 +30,23 @@ struct SymbolLocation {
   // Position is encoded into 32 bits to save space.
   // If Line/Column overflow, the value will be their maximum value.
   struct Position {
-    Position() : Line(0), Column(0) {}
+    Position() : LineColumnPacked(0) {}
     void setLine(uint32_t Line);
-    uint32_t line() const { return Line; }
+    uint32_t line() const { return LineColumnPacked >> ColumnBits; }
     void setColumn(uint32_t Column);
-    uint32_t column() const { return Column; }
+    uint32_t column() const { return LineColumnPacked & MaxColumn; }
+    uint32_t rep() const { return LineColumnPacked; }
 
     bool hasOverflow() const {
-      return Line >= MaxLine || Column >= MaxColumn;
+      return line() == MaxLine || column() == MaxColumn;
     }
 
-    static constexpr uint32_t MaxLine = (1 << 20) - 1;
-    static constexpr uint32_t MaxColumn = (1 << 12) - 1;
+    static constexpr unsigned ColumnBits = 12;
+    static constexpr uint32_t MaxLine = (1 << (32 - ColumnBits)) - 1;
+    static constexpr uint32_t MaxColumn = (1 << ColumnBits) - 1;
 
   private:
-    uint32_t Line : 20;   // 0-based
-    uint32_t Column : 12; // 0-based
+    uint32_t LineColumnPacked; // Top 20 bit line, bottom 12 bits column.
   };
 
   /// The symbol range, using half-open range [Start, End).


        


More information about the cfe-commits mailing list