[clang-tools-extra] d19265b - [clangd] Avoid wasteful data structures in RefSlab::Builder
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Mon May 18 13:44:54 PDT 2020
Author: Sam McCall
Date: 2020-05-18T22:34:59+02:00
New Revision: d19265b31e65c84ad908abce6e7f6e6d15c22258
URL: https://github.com/llvm/llvm-project/commit/d19265b31e65c84ad908abce6e7f6e6d15c22258
DIFF: https://github.com/llvm/llvm-project/commit/d19265b31e65c84ad908abce6e7f6e6d15c22258.diff
LOG: [clangd] Avoid wasteful data structures in RefSlab::Builder
Summary: This is worth another 10% or so on InedxBenchmark.DexBuild.
Reviewers: kbobyrev
Subscribers: ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D79950
Added:
Modified:
clang-tools-extra/clangd/index/Ref.cpp
clang-tools-extra/clangd/index/Ref.h
clang-tools-extra/clangd/index/SymbolLocation.cpp
clang-tools-extra/clangd/index/SymbolLocation.h
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/index/Ref.cpp b/clang-tools-extra/clangd/index/Ref.cpp
index 115894ce6743..e622a3f78e42 100644
--- a/clang-tools-extra/clangd/index/Ref.cpp
+++ b/clang-tools-extra/clangd/index/Ref.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "Ref.h"
+#include "llvm/ADT/STLExtras.h"
namespace clang {
namespace clangd {
@@ -33,27 +34,34 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Ref &R) {
}
void RefSlab::Builder::insert(const SymbolID &ID, const Ref &S) {
- auto &M = Refs[ID];
- if (M.count(S))
- return;
- Ref R = S;
- R.Location.FileURI =
- UniqueStrings.save(R.Location.FileURI).data();
- M.insert(std::move(R));
+ Entry E = {ID, S};
+ E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data();
+ Entries.insert(std::move(E));
}
RefSlab RefSlab::Builder::build() && {
- // We can reuse the arena, as it only has unique strings and we need them all.
- // Reallocate refs on the arena to reduce waste and indirections when reading.
std::vector<std::pair<SymbolID, llvm::ArrayRef<Ref>>> Result;
- Result.reserve(Refs.size());
- size_t NumRefs = 0;
- for (auto &Sym : Refs) {
- std::vector<Ref> SymRefs(Sym.second.begin(), Sym.second.end());
- NumRefs += SymRefs.size();
- Result.emplace_back(Sym.first, llvm::ArrayRef<Ref>(SymRefs).copy(Arena));
+ // We'll reuse the arena, as it only has unique strings and we need them all.
+ // We need to group refs by symbol and form contiguous arrays on the arena.
+ std::vector<std::pair<SymbolID, const Ref *>> Flat;
+ Flat.reserve(Entries.size());
+ for (const Entry &E : Entries)
+ Flat.emplace_back(E.Symbol, &E.Reference);
+ // Group by SymbolID.
+ llvm::sort(Flat, llvm::less_first());
+ std::vector<Ref> Refs;
+ // Loop over symbols, copying refs for each onto the arena.
+ for (auto I = Flat.begin(), End = Flat.end(); I != End;) {
+ SymbolID Sym = I->first;
+ Refs.clear();
+ do {
+ Refs.push_back(*I->second);
+ ++I;
+ } while (I != End && I->first == Sym);
+ llvm::sort(Refs); // By file, affects xrefs display order.
+ Result.emplace_back(Sym, llvm::ArrayRef<Ref>(Refs).copy(Arena));
}
- return RefSlab(std::move(Result), std::move(Arena), NumRefs);
+ return RefSlab(std::move(Result), std::move(Arena), Entries.size());
}
} // namespace clangd
diff --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h
index 382ade712017..cba8adaee7c2 100644
--- a/clang-tools-extra/clangd/index/Ref.h
+++ b/clang-tools-extra/clangd/index/Ref.h
@@ -13,6 +13,8 @@
#include "SymbolLocation.h"
#include "clang/Index/IndexSymbol.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
@@ -132,9 +134,17 @@ class RefSlab {
RefSlab build() &&;
private:
+ // A ref we're storing with its symbol to consume with build().
+ // All strings are interned, so DenseMapInfo can use pointer comparisons.
+ struct Entry {
+ SymbolID Symbol;
+ Ref Reference;
+ };
+ friend struct llvm::DenseMapInfo<Entry>;
+
llvm::BumpPtrAllocator Arena;
llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
- llvm::DenseMap<SymbolID, std::set<Ref>> Refs;
+ llvm::DenseSet<Entry> Entries;
};
private:
@@ -151,4 +161,31 @@ class RefSlab {
} // namespace clangd
} // namespace clang
+namespace llvm {
+template <> struct DenseMapInfo<clang::clangd::RefSlab::Builder::Entry> {
+ using Entry = clang::clangd::RefSlab::Builder::Entry;
+ static inline Entry getEmptyKey() {
+ static Entry E{clang::clangd::SymbolID(""), {}};
+ return E;
+ }
+ static inline Entry getTombstoneKey() {
+ static Entry E{clang::clangd::SymbolID("TOMBSTONE"), {}};
+ return E;
+ }
+ static unsigned getHashValue(const Entry &Val) {
+ return llvm::hash_combine(
+ Val.Symbol, reinterpret_cast<uintptr_t>(Val.Reference.Location.FileURI),
+ Val.Reference.Location.Start.rep(), Val.Reference.Location.End.rep());
+ }
+ static bool isEqual(const Entry &LHS, const Entry &RHS) {
+ return std::tie(LHS.Symbol, LHS.Reference.Location.FileURI,
+ LHS.Reference.Kind) ==
+ std::tie(RHS.Symbol, RHS.Reference.Location.FileURI,
+ RHS.Reference.Kind) &&
+ LHS.Reference.Location.Start == RHS.Reference.Location.Start &&
+ LHS.Reference.Location.End == RHS.Reference.Location.End;
+ }
+};
+}; // namespace llvm
+
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_REF_H
diff --git a/clang-tools-extra/clangd/index/SymbolLocation.cpp b/clang-tools-extra/clangd/index/SymbolLocation.cpp
index aac55703c35f..61da267b93ce 100644
--- a/clang-tools-extra/clangd/index/SymbolLocation.cpp
+++ b/clang-tools-extra/clangd/index/SymbolLocation.cpp
@@ -15,18 +15,14 @@ constexpr uint32_t SymbolLocation::Position::MaxLine;
constexpr uint32_t SymbolLocation::Position::MaxColumn;
void SymbolLocation::Position::setLine(uint32_t L) {
- if (L > MaxLine) {
- Line = MaxLine;
- return;
- }
- Line = L;
+ if (L > MaxLine)
+ L = MaxLine;
+ LineColumnPacked = (L << ColumnBits) | column();
}
void SymbolLocation::Position::setColumn(uint32_t Col) {
- if (Col > MaxColumn) {
- Column = MaxColumn;
- return;
- }
- Column = Col;
+ if (Col > MaxColumn)
+ Col = MaxColumn;
+ LineColumnPacked = (LineColumnPacked & ~MaxColumn) | Col;
}
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolLocation &L) {
diff --git a/clang-tools-extra/clangd/index/SymbolLocation.h b/clang-tools-extra/clangd/index/SymbolLocation.h
index 9b503546a9f7..2dd33839115e 100644
--- a/clang-tools-extra/clangd/index/SymbolLocation.h
+++ b/clang-tools-extra/clangd/index/SymbolLocation.h
@@ -30,22 +30,23 @@ struct SymbolLocation {
// Position is encoded into 32 bits to save space.
// If Line/Column overflow, the value will be their maximum value.
struct Position {
- Position() : Line(0), Column(0) {}
+ Position() : LineColumnPacked(0) {}
void setLine(uint32_t Line);
- uint32_t line() const { return Line; }
+ uint32_t line() const { return LineColumnPacked >> ColumnBits; }
void setColumn(uint32_t Column);
- uint32_t column() const { return Column; }
+ uint32_t column() const { return LineColumnPacked & MaxColumn; }
+ uint32_t rep() const { return LineColumnPacked; }
bool hasOverflow() const {
- return Line >= MaxLine || Column >= MaxColumn;
+ return line() == MaxLine || column() == MaxColumn;
}
- static constexpr uint32_t MaxLine = (1 << 20) - 1;
- static constexpr uint32_t MaxColumn = (1 << 12) - 1;
+ static constexpr unsigned ColumnBits = 12;
+ static constexpr uint32_t MaxLine = (1 << (32 - ColumnBits)) - 1;
+ static constexpr uint32_t MaxColumn = (1 << ColumnBits) - 1;
private:
- uint32_t Line : 20; // 0-based
- uint32_t Column : 12; // 0-based
+ uint32_t LineColumnPacked; // Top 20 bit line, bottom 12 bits column.
};
/// The symbol range, using half-open range [Start, End).
More information about the cfe-commits
mailing list