[clang-tools-extra] r341375 - [clangd] Define a compact binary serialization fomat for symbol slab/index.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 4 09:16:51 PDT 2018
Author: sammccall
Date: Tue Sep 4 09:16:50 2018
New Revision: 341375
URL: http://llvm.org/viewvc/llvm-project?rev=341375&view=rev
Log:
[clangd] Define a compact binary serialization fomat for symbol slab/index.
Summary:
This is intended to replace the current YAML format for general use.
It's ~10x more compact than YAML, and ~40% more compact than gzipped YAML:
llvmidx.riff = 20M, llvmidx.yaml = 272M, llvmidx.yaml.gz = 32M
It's also simpler/faster to read and write.
The format is a RIFF container (chunks of (type, size, data)) with:
- a compressed string table
- simple binary encoding of symbols (with varints for compactness)
It can be extended to include occurrences, Dex posting lists, etc.
There's no rich backwards-compatibility scheme, but a version number is included
so we can detect incompatible files and do ad-hoc back-compat.
Alternatives considered:
- compressed YAML or JSON: bulky and slow to load
- llvm bitstream: confusing model and libraries are hard to use. My attempt
produced slightly larger files, and the code was longer and slower.
- protobuf or similar: would be really nice (esp for back-compat) but the
dependency is a big hassle
- ad-hoc binary format without a container: it seems clear we're going
to add posting lists and occurrences here, and that they will benefit
from sharing a string table. The container makes it easy to debug
these pieces in isolation, and make them optional.
Reviewers: ioeric
Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, cfe-commits
Differential Revision: https://reviews.llvm.org/D51585
Added:
clang-tools-extra/trunk/clangd/RIFF.cpp
clang-tools-extra/trunk/clangd/RIFF.h
clang-tools-extra/trunk/clangd/index/Serialization.cpp
clang-tools-extra/trunk/clangd/index/Serialization.h
clang-tools-extra/trunk/unittests/clangd/RIFFTests.cpp
clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
Modified:
clang-tools-extra/trunk/clangd/CMakeLists.txt
clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
clang-tools-extra/trunk/clangd/index/Index.cpp
clang-tools-extra/trunk/clangd/index/Index.h
clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Tue Sep 4 09:16:50 2018
@@ -29,6 +29,7 @@ add_clang_library(clangDaemon
Protocol.cpp
ProtocolHandlers.cpp
Quality.cpp
+ RIFF.cpp
SourceCode.cpp
Threading.cpp
Trace.cpp
@@ -41,6 +42,7 @@ add_clang_library(clangDaemon
index/Index.cpp
index/MemIndex.cpp
index/Merge.cpp
+ index/Serialization.cpp
index/SymbolCollector.cpp
index/SymbolYAML.cpp
Added: clang-tools-extra/trunk/clangd/RIFF.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/RIFF.cpp?rev=341375&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/RIFF.cpp (added)
+++ clang-tools-extra/trunk/clangd/RIFF.cpp Tue Sep 4 09:16:50 2018
@@ -0,0 +1,88 @@
+//===--- RIFF.cpp - Binary container file format --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RIFF.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+namespace clang {
+namespace clangd {
+namespace riff {
+
+static Error makeError(const char *Msg) {
+ return createStringError(inconvertibleErrorCode(), Msg);
+}
+
+Expected<Chunk> readChunk(StringRef &Stream) {
+ if (Stream.size() < 8)
+ return makeError("incomplete chunk header");
+ Chunk C;
+ std::copy(Stream.begin(), Stream.begin() + 4, C.ID.begin());
+ Stream = Stream.drop_front(4);
+ uint32_t Len = support::endian::read32le(Stream.take_front(4).begin());
+ Stream = Stream.drop_front(4);
+ if (Stream.size() < Len)
+ return makeError("truncated chunk");
+ C.Data = Stream.take_front(Len);
+ Stream = Stream.drop_front(Len);
+ if (Len % 2 & !Stream.empty()) { // Skip padding byte.
+ if (Stream.front())
+ return makeError("nonzero padding byte");
+ Stream = Stream.drop_front();
+ }
+ return C;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const Chunk &C) {
+ OS.write(C.ID.begin(), C.ID.size());
+ char Size[4];
+ llvm::support::endian::write32le(Size, C.Data.size());
+ OS.write(Size, sizeof(Size));
+ OS << C.Data;
+ if (C.Data.size() % 2)
+ OS.write(0);
+ return OS;
+}
+
+llvm::Expected<File> readFile(llvm::StringRef Stream) {
+ auto RIFF = readChunk(Stream);
+ if (!RIFF)
+ return RIFF.takeError();
+ if (RIFF->ID != fourCC("RIFF"))
+ return makeError("not a RIFF container");
+ if (RIFF->Data.size() < 4)
+ return makeError("RIFF chunk too short");
+ File F;
+ std::copy(RIFF->Data.begin(), RIFF->Data.begin() + 4, F.Type.begin());
+ for (llvm::StringRef Body = RIFF->Data.drop_front(4); !Body.empty();)
+ if (auto Chunk = readChunk(Body)) {
+ F.Chunks.push_back(*Chunk);
+ } else
+ return Chunk.takeError();
+ return F;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const File &F) {
+ // To avoid copies, we serialize the outer RIFF chunk "by hand".
+ size_t DataLen = 4; // Predict length of RIFF chunk data.
+ for (const auto &C : F.Chunks)
+ DataLen += 4 + 4 + C.Data.size() + (C.Data.size() % 2);
+ OS << "RIFF";
+ char Size[4];
+ llvm::support::endian::write32le(Size, DataLen);
+ OS.write(Size, sizeof(Size));
+ OS.write(F.Type.begin(), F.Type.size());
+ for (const auto &C : F.Chunks)
+ OS << C;
+ return OS;
+}
+
+} // namespace riff
+} // namespace clangd
+} // namespace clang
Added: clang-tools-extra/trunk/clangd/RIFF.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/RIFF.h?rev=341375&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/RIFF.h (added)
+++ clang-tools-extra/trunk/clangd/RIFF.h Tue Sep 4 09:16:50 2018
@@ -0,0 +1,81 @@
+//===--- RIFF.h - Binary container file format -------------------*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tools for reading and writing data in RIFF containers.
+//
+// A chunk consists of:
+// - ID : char[4]
+// - Length : uint32
+// - Data : byte[Length]
+// - Padding : byte[Length % 2]
+// The semantics of a chunk's Data are determined by its ID.
+// The format makes it easy to skip over uninteresting or unknown chunks.
+//
+// A RIFF file is a single chunk with ID "RIFF". Its Data is:
+// - Type : char[4]
+// - Chunks : chunk[]
+//
+// This means that a RIFF file consists of:
+// - "RIFF" : char[4]
+// - File length - 8 : uint32
+// - File type : char[4]
+// - Chunks : chunk[]
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_RIFF_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_RIFF_H
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include <array>
+
+namespace clang {
+namespace clangd {
+namespace riff {
+
+// A FourCC identifies a chunk in a file, or the type of file itself.
+using FourCC = std::array<char, 4>;
+// Get a FourCC from a string literal, e.g. fourCC("RIFF").
+inline constexpr FourCC fourCC(const char (&Literal)[5]) {
+ return FourCC{{Literal[0], Literal[1], Literal[2], Literal[3]}};
+}
+// A chunk is a section in a RIFF container.
+struct Chunk {
+ FourCC ID;
+ llvm::StringRef Data;
+};
+inline bool operator==(const Chunk &L, const Chunk &R) {
+ return std::tie(L.ID, L.Data) == std::tie(R.ID, R.Data);
+}
+// A File is a RIFF container, which is a typed chunk sequence.
+struct File {
+ FourCC Type;
+ std::vector<Chunk> Chunks;
+};
+inline bool operator==(const File &L, const File &R) {
+ return std::tie(L.Type, L.Chunks) == std::tie(R.Type, R.Chunks);
+}
+
+// Reads a single chunk from the start of Stream.
+// Stream is updated to exclude the consumed chunk.
+llvm::Expected<Chunk> readChunk(llvm::StringRef &Stream);
+
+// Serialize a single chunk to OS.
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Chunk &);
+
+// Parses a RIFF file consisting of a single RIFF chunk.
+llvm::Expected<File> readFile(llvm::StringRef Stream);
+
+// Serialize a RIFF file (i.e. a single RIFF chunk) to OS.
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const File &);
+
+} // namespace riff
+} // namespace clangd
+} // namespace clang
+#endif
Modified: clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp Tue Sep 4 09:16:50 2018
@@ -7,15 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// GlobalSymbolBuilder is a tool to generate YAML-format symbols across the
-// whole project. This tools is for **experimental** only. Don't use it in
-// production code.
+// GlobalSymbolBuilder is a tool to extract symbols from a whole project.
+// This tool is **experimental** only. Don't use it in production code.
//
//===----------------------------------------------------------------------===//
+#include "RIFF.h"
#include "index/CanonicalIncludes.h"
#include "index/Index.h"
#include "index/Merge.h"
+#include "index/Serialization.h"
#include "index/SymbolCollector.h"
#include "index/SymbolYAML.h"
#include "clang/Frontend/CompilerInstance.h"
@@ -59,6 +60,14 @@ static llvm::cl::opt<bool> MergeOnTheFly
"MapReduce."),
llvm::cl::init(true), llvm::cl::Hidden);
+enum class Format { YAML, Binary };
+static llvm::cl::opt<Format>
+ Format("format", llvm::cl::desc("Format of the index to be written"),
+ llvm::cl::values(
+ clEnumValN(Format::YAML, "yaml", "human-readable YAML format"),
+ clEnumValN(Format::Binary, "binary", "binary RIFF format")),
+ llvm::cl::init(Format::YAML));
+
/// Responsible for aggregating symbols from each processed file and producing
/// the final results. All methods in this class must be thread-safe,
/// 'consumeSymbols' may be called from multiple threads.
@@ -210,8 +219,8 @@ int main(int argc, const char **argv) {
llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
const char *Overview = R"(
- This is an **experimental** tool to generate YAML-format project-wide symbols
- for clangd (global code completion). It would be changed and deprecated
+ This is an **experimental** tool to extract symbols from a whole project
+ for clangd (global code completion). It will be changed and deprecated
eventually. Don't use it in production code!
Example usage for building index for the whole project using CMake compile
@@ -262,7 +271,16 @@ int main(int argc, const char **argv) {
}
// Reduce phase: combine symbols with the same IDs.
auto UniqueSymbols = Consumer->mergeResults();
- // Output phase: emit YAML for result symbols.
- SymbolsToYAML(UniqueSymbols, llvm::outs());
+ // Output phase: emit result symbols.
+ switch (clang::clangd::Format) {
+ case clang::clangd::Format::YAML:
+ SymbolsToYAML(UniqueSymbols, llvm::outs());
+ break;
+ case clang::clangd::Format::Binary: {
+ clang::clangd::IndexFileOut Out;
+ Out.Symbols = &UniqueSymbols;
+ llvm::outs() << Out;
+ }
+ }
return 0;
}
Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Index.cpp Tue Sep 4 09:16:50 2018
@@ -10,6 +10,7 @@
#include "Index.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,21 +29,20 @@ SymbolID::SymbolID(StringRef USR)
: HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) {
- OS << toHex(toStringRef(ID.HashValue));
- return OS;
+ return OS << toHex(ID.raw());
}
-std::string SymbolID::str() const {
- std::string ID;
- llvm::raw_string_ostream OS(ID);
- OS << *this;
- return OS.str();
+SymbolID SymbolID::fromRaw(llvm::StringRef Raw) {
+ SymbolID ID;
+ assert(Raw.size() == RawSize);
+ memcpy(ID.HashValue.data(), Raw.data(), RawSize);
+ return ID;
}
+std::string SymbolID::str() const { return toHex(raw()); }
+
void operator>>(StringRef Str, SymbolID &ID) {
- std::string HexString = fromHex(Str);
- assert(HexString.size() == ID.HashValue.size());
- std::copy(HexString.begin(), HexString.end(), ID.HashValue.begin());
+ ID = SymbolID::fromRaw(fromHex(Str));
}
raw_ostream &operator<<(raw_ostream &OS, SymbolOrigin O) {
@@ -78,34 +78,18 @@ SymbolSlab::const_iterator SymbolSlab::f
}
// Copy the underlying data of the symbol into the owned arena.
-static void own(Symbol &S, llvm::UniqueStringSaver &Strings,
- BumpPtrAllocator &Arena) {
- // Intern replaces V with a reference to the same string owned by the arena.
- auto Intern = [&](StringRef &V) { V = Strings.save(V); };
-
- // We need to copy every StringRef field onto the arena.
- Intern(S.Name);
- Intern(S.Scope);
- Intern(S.CanonicalDeclaration.FileURI);
- Intern(S.Definition.FileURI);
-
- Intern(S.Signature);
- Intern(S.CompletionSnippetSuffix);
-
- Intern(S.Documentation);
- Intern(S.ReturnType);
- for (auto &I : S.IncludeHeaders)
- Intern(I.IncludeHeader);
+static void own(Symbol &S, llvm::UniqueStringSaver &Strings) {
+ visitStrings(S, [&](StringRef &V) { V = Strings.save(V); });
}
void SymbolSlab::Builder::insert(const Symbol &S) {
auto R = SymbolIndex.try_emplace(S.ID, Symbols.size());
if (R.second) {
Symbols.push_back(S);
- own(Symbols.back(), UniqueStrings, Arena);
+ own(Symbols.back(), UniqueStrings);
} else {
auto &Copy = Symbols[R.first->second] = S;
- own(Copy, UniqueStrings, Arena);
+ own(Copy, UniqueStrings);
}
}
@@ -118,7 +102,7 @@ SymbolSlab SymbolSlab::Builder::build()
BumpPtrAllocator NewArena;
llvm::UniqueStringSaver Strings(NewArena);
for (auto &S : Symbols)
- own(S, Strings, NewArena);
+ own(S, Strings);
return SymbolSlab(std::move(NewArena), std::move(Symbols));
}
Modified: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (original)
+++ clang-tools-extra/trunk/clangd/index/Index.h Tue Sep 4 09:16:50 2018
@@ -84,26 +84,28 @@ public:
return HashValue < Sym.HashValue;
}
+ constexpr static size_t RawSize = 20;
+ llvm::StringRef raw() const {
+ return StringRef(reinterpret_cast<const char *>(HashValue.data()), RawSize);
+ }
+ static SymbolID fromRaw(llvm::StringRef);
// Returns a 40-bytes hex encoded string.
std::string str() const;
private:
- static constexpr unsigned HashByteLength = 20;
-
- friend llvm::hash_code hash_value(const SymbolID &ID) {
- // We already have a good hash, just return the first bytes.
- static_assert(sizeof(size_t) <= HashByteLength, "size_t longer than SHA1!");
- size_t Result;
- memcpy(&Result, ID.HashValue.data(), sizeof(size_t));
- return llvm::hash_code(Result);
- }
- friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
- const SymbolID &ID);
friend void operator>>(llvm::StringRef Str, SymbolID &ID);
- std::array<uint8_t, HashByteLength> HashValue;
+ std::array<uint8_t, RawSize> HashValue;
};
+inline llvm::hash_code hash_value(const SymbolID &ID) {
+ // We already have a good hash, just return the first bytes.
+ assert(sizeof(size_t) <= SymbolID::RawSize && "size_t longer than SHA1!");
+ size_t Result;
+ memcpy(&Result, ID.raw().data(), sizeof(size_t));
+ return llvm::hash_code(Result);
+}
+
// Write SymbolID into the given stream. SymbolID is encoded as a 40-bytes
// hex string.
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolID &ID);
@@ -246,6 +248,21 @@ struct Symbol {
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
+// Invokes Callback with each StringRef& contained in the Symbol.
+// Useful for deduplicating backing strings.
+template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
+ CB(S.Name);
+ CB(S.Scope);
+ CB(S.CanonicalDeclaration.FileURI);
+ CB(S.Definition.FileURI);
+ CB(S.Signature);
+ CB(S.CompletionSnippetSuffix);
+ CB(S.Documentation);
+ CB(S.ReturnType);
+ for (auto &Include : S.IncludeHeaders)
+ CB(Include.IncludeHeader);
+}
+
// Computes query-independent quality score for a Symbol.
// This currently falls in the range [1, ln(#indexed documents)].
// FIXME: this should probably be split into symbol -> signals
Added: clang-tools-extra/trunk/clangd/index/Serialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=341375&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.cpp (added)
+++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Tue Sep 4 09:16:50 2018
@@ -0,0 +1,366 @@
+//===-- Serialization.cpp - Binary serialization of index data ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "Serialization.h"
+#include "../RIFF.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+
+using namespace llvm;
+namespace clang {
+namespace clangd {
+namespace {
+Error makeError(const Twine &Msg) {
+ return make_error<StringError>(Msg, inconvertibleErrorCode());
+}
+
+// IO PRIMITIVES
+// We use little-endian 32 bit ints, sometimes with variable-length encoding.
+
+StringRef consume(StringRef &Data, int N) {
+ StringRef Ret = Data.take_front(N);
+ Data = Data.drop_front(N);
+ return Ret;
+}
+
+uint8_t consume8(StringRef &Data) {
+ uint8_t Ret = Data.front();
+ Data = Data.drop_front();
+ return Ret;
+}
+
+uint32_t consume32(StringRef &Data) {
+ auto Ret = support::endian::read32le(Data.bytes_begin());
+ Data = Data.drop_front(4);
+ return Ret;
+}
+
+void write32(uint32_t I, raw_ostream &OS) {
+ char buf[4];
+ support::endian::write32le(buf, I);
+ OS.write(buf, sizeof(buf));
+}
+
+// Variable-length int encoding (varint) uses the bottom 7 bits of each byte
+// to encode the number, and the top bit to indicate whether more bytes follow.
+// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
+// This represents 0x1a | 0x2f<<7 = 6042.
+// A 32-bit integer takes 1-5 bytes to encode; small numbers are more compact.
+void writeVar(uint32_t I, raw_ostream &OS) {
+ constexpr static uint8_t More = 1 << 7;
+ if (LLVM_LIKELY(I < 1 << 7)) {
+ OS.write(I);
+ return;
+ }
+ for (;;) {
+ OS.write(I | More);
+ I >>= 7;
+ if (I < 1 << 7) {
+ OS.write(I);
+ return;
+ }
+ }
+}
+
+uint32_t consumeVar(StringRef &Data) {
+ constexpr static uint8_t More = 1 << 7;
+ uint8_t B = consume8(Data);
+ if (LLVM_LIKELY(!(B & More)))
+ return B;
+ uint32_t Val = B & ~More;
+ for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
+ B = consume8(Data);
+ Val |= (B & ~More) << Shift;
+ }
+ return Val;
+}
+
+// STRING TABLE ENCODING
+// Index data has many string fields, and many strings are identical.
+// We store each string once, and refer to them by index.
+//
+// The string table's format is:
+// - UncompressedSize : uint32
+// - CompressedData : byte[CompressedSize]
+//
+// CompressedData is a zlib-compressed byte[UncompressedSize].
+// It contains a sequence of null-terminated strings, e.g. "foo\0bar\0".
+// These are sorted to improve compression.
+
+// Maps each string to a canonical representation.
+// Strings remain owned externally (e.g. by SymbolSlab).
+class StringTableOut {
+ DenseSet<StringRef> Unique;
+ std::vector<StringRef> Sorted;
+ // Since strings are interned, look up can be by pointer.
+ DenseMap<std::pair<const char *, size_t>, unsigned> Index;
+
+public:
+ // Add a string to the table. Overwrites S if an identical string exists.
+ void intern(StringRef &S) { S = *Unique.insert(S).first; };
+ // Finalize the table and write it to OS. No more strings may be added.
+ void finalize(raw_ostream &OS) {
+ Sorted = {Unique.begin(), Unique.end()};
+ std::sort(Sorted.begin(), Sorted.end());
+ for (unsigned I = 0; I < Sorted.size(); ++I)
+ Index.try_emplace({Sorted[I].data(), Sorted[I].size()}, I);
+
+ std::string RawTable;
+ for (StringRef S : Sorted) {
+ RawTable.append(S);
+ RawTable.push_back(0);
+ }
+ SmallString<1> Compressed;
+ cantFail(zlib::compress(RawTable, Compressed));
+ write32(RawTable.size(), OS);
+ OS << Compressed;
+ }
+ // Get the ID of an string, which must be interned. Table must be finalized.
+ unsigned index(StringRef S) const {
+ assert(!Sorted.empty() && "table not finalized");
+ assert(Index.count({S.data(), S.size()}) && "string not interned");
+ return Index.find({S.data(), S.size()})->second;
+ }
+};
+
+struct StringTableIn {
+ BumpPtrAllocator Arena;
+ std::vector<StringRef> Strings;
+};
+
+Expected<StringTableIn> readStringTable(StringRef Data) {
+ if (Data.size() < 4)
+ return makeError("Bad string table: not enough metadata");
+ size_t UncompressedSize = consume32(Data);
+ SmallString<1> Uncompressed;
+ if (Error E = llvm::zlib::uncompress(Data, Uncompressed, UncompressedSize))
+ return std::move(E);
+
+ StringTableIn Table;
+ StringSaver Saver(Table.Arena);
+ for (StringRef Rest = Uncompressed; !Rest.empty();) {
+ auto Len = Rest.find(0);
+ if (Len == StringRef::npos)
+ return makeError("Bad string table: not null terminated");
+ Table.Strings.push_back(Saver.save(consume(Rest, Len)));
+ Rest = Rest.drop_front();
+ }
+ return Table;
+}
+
+// SYMBOL ENCODING
+// Each field of clangd::Symbol is encoded in turn (see implementation).
+// - StringRef fields encode as varint (index into the string table)
+// - enums encode as the underlying type
+// - most numbers encode as varint
+
+// It's useful to the implementation to assume symbols have a bounded size.
+constexpr size_t SymbolSizeBound = 512;
+// To ensure the bounded size, restrict the number of include headers stored.
+constexpr unsigned MaxIncludes = 50;
+
+void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
+ raw_ostream &OS) {
+ auto StartOffset = OS.tell();
+ OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
+ // symbol IDs should probably be in a string table.
+ OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
+ OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
+ writeVar(Strings.index(Sym.Name), OS);
+ writeVar(Strings.index(Sym.Scope), OS);
+ for (const auto &Loc : {Sym.Definition, Sym.CanonicalDeclaration}) {
+ writeVar(Strings.index(Loc.FileURI), OS);
+ for (const auto &Endpoint : {Loc.Start, Loc.End}) {
+ writeVar(Endpoint.Line, OS);
+ writeVar(Endpoint.Column, OS);
+ }
+ }
+ writeVar(Sym.References, OS);
+ OS.write(Sym.IsIndexedForCodeCompletion);
+ OS.write(static_cast<uint8_t>(Sym.Origin));
+ writeVar(Strings.index(Sym.Signature), OS);
+ writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
+ writeVar(Strings.index(Sym.Documentation), OS);
+ writeVar(Strings.index(Sym.ReturnType), OS);
+
+ auto WriteInclude = [&](const Symbol::IncludeHeaderWithReferences &Include) {
+ writeVar(Strings.index(Include.IncludeHeader), OS);
+ writeVar(Include.References, OS);
+ };
+ // There are almost certainly few includes, so we can just write them.
+ if (LLVM_LIKELY(Sym.IncludeHeaders.size() <= MaxIncludes)) {
+ writeVar(Sym.IncludeHeaders.size(), OS);
+ for (const auto &Include : Sym.IncludeHeaders)
+ WriteInclude(Include);
+ } else {
+ // If there are too many, make sure we truncate the least important.
+ using Pointer = const Symbol::IncludeHeaderWithReferences *;
+ std::vector<Pointer> Pointers;
+ for (const auto &Include : Sym.IncludeHeaders)
+ Pointers.push_back(&Include);
+ std::sort(Pointers.begin(), Pointers.end(), [](Pointer L, Pointer R) {
+ return L->References > R->References;
+ });
+ Pointers.resize(MaxIncludes);
+
+ writeVar(MaxIncludes, OS);
+ for (Pointer P : Pointers)
+ WriteInclude(*P);
+ }
+
+ assert(OS.tell() - StartOffset < SymbolSizeBound && "Symbol length unsafe!");
+ (void)StartOffset; // Unused in NDEBUG;
+}
+
+Expected<Symbol> readSymbol(StringRef &Data, const StringTableIn &Strings) {
+ // Usually we can skip bounds checks because the buffer is huge.
+ // Near the end of the buffer, this would be unsafe. In this rare case, copy
+ // the data into a bigger buffer so we can again skip the checks.
+ if (LLVM_UNLIKELY(Data.size() < SymbolSizeBound)) {
+ std::string Buf(Data);
+ Buf.resize(SymbolSizeBound);
+ StringRef ExtendedData = Buf;
+ auto Ret = readSymbol(ExtendedData, Strings);
+ unsigned BytesRead = Buf.size() - ExtendedData.size();
+ if (BytesRead > Data.size())
+ return makeError("read past end of data");
+ Data = Data.drop_front(BytesRead);
+ return Ret;
+ }
+
+#define READ_STRING(Field) \
+ do { \
+ auto StringIndex = consumeVar(Data); \
+ if (LLVM_UNLIKELY(StringIndex >= Strings.Strings.size())) \
+ return makeError("Bad string index"); \
+ Field = Strings.Strings[StringIndex]; \
+ } while (0)
+
+ Symbol Sym;
+ Sym.ID = SymbolID::fromRaw(consume(Data, 20));
+ Sym.SymInfo.Kind = static_cast<index::SymbolKind>(consume8(Data));
+ Sym.SymInfo.Lang = static_cast<index::SymbolLanguage>(consume8(Data));
+ READ_STRING(Sym.Name);
+ READ_STRING(Sym.Scope);
+ for (SymbolLocation *Loc : {&Sym.Definition, &Sym.CanonicalDeclaration}) {
+ READ_STRING(Loc->FileURI);
+ for (auto &Endpoint : {&Loc->Start, &Loc->End}) {
+ Endpoint->Line = consumeVar(Data);
+ Endpoint->Column = consumeVar(Data);
+ }
+ }
+ Sym.References = consumeVar(Data);
+ Sym.IsIndexedForCodeCompletion = consume8(Data);
+ Sym.Origin = static_cast<SymbolOrigin>(consume8(Data));
+ READ_STRING(Sym.Signature);
+ READ_STRING(Sym.CompletionSnippetSuffix);
+ READ_STRING(Sym.Documentation);
+ READ_STRING(Sym.ReturnType);
+ unsigned IncludeHeaderN = consumeVar(Data);
+ if (IncludeHeaderN > MaxIncludes)
+ return makeError("too many IncludeHeaders");
+ Sym.IncludeHeaders.resize(IncludeHeaderN);
+ for (auto &I : Sym.IncludeHeaders) {
+ READ_STRING(I.IncludeHeader);
+ I.References = consumeVar(Data);
+ }
+
+#undef READ_STRING
+ return Sym;
+}
+
+} // namespace
+
+// FILE ENCODING
+// A file is a RIFF chunk with type 'CdIx'.
+// It contains the sections:
+// - meta: version number
+// - stri: string table
+// - symb: symbols
+
+// The current versioning scheme is simple - non-current versions are rejected.
+// This allows arbitrary format changes, which invalidate stored data.
+// Later we may want to support some backward compatibility.
+constexpr static uint32_t Version = 1;
+
+Expected<IndexFileIn> readIndexFile(StringRef Data) {
+ auto RIFF = riff::readFile(Data);
+ if (!RIFF)
+ return RIFF.takeError();
+ if (RIFF->Type != riff::fourCC("CdIx"))
+ return makeError("wrong RIFF type");
+ StringMap<StringRef> Chunks;
+ for (const auto &Chunk : RIFF->Chunks)
+ Chunks.try_emplace(StringRef(Chunk.ID.data(), Chunk.ID.size()), Chunk.Data);
+
+ for (StringRef RequiredChunk : {"meta", "stri"})
+ if (!Chunks.count(RequiredChunk))
+ return makeError("missing required chunk " + RequiredChunk);
+
+ StringRef Meta = Chunks.lookup("meta");
+ if (Meta.size() < 4 || consume32(Meta) != Version)
+ return makeError("wrong version");
+
+ auto Strings = readStringTable(Chunks.lookup("stri"));
+ if (!Strings)
+ return Strings.takeError();
+
+ IndexFileIn Result;
+ if (Chunks.count("symb")) {
+ StringRef SymbolData = Chunks.lookup("symb");
+ SymbolSlab::Builder Symbols;
+ while (!SymbolData.empty())
+ if (auto Sym = readSymbol(SymbolData, *Strings))
+ Symbols.insert(*Sym);
+ else
+ return Sym.takeError();
+ Result.Symbols = std::move(Symbols).build();
+ }
+ return Result;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const IndexFileOut &Data) {
+ assert(Data.Symbols && "An index file without symbols makes no sense!");
+ riff::File RIFF;
+ RIFF.Type = riff::fourCC("CdIx");
+
+ SmallString<4> Meta;
+ {
+ raw_svector_ostream MetaOS(Meta);
+ write32(Version, MetaOS);
+ }
+ RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
+
+ StringTableOut Strings;
+ std::vector<Symbol> Symbols;
+ for (const auto &Sym : *Data.Symbols) {
+ Symbols.emplace_back(Sym);
+ visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
+ }
+
+ std::string StringSection;
+ {
+ raw_string_ostream StringOS(StringSection);
+ Strings.finalize(StringOS);
+ }
+ RIFF.Chunks.push_back({riff::fourCC("stri"), StringSection});
+
+ std::string SymbolSection;
+ {
+ raw_string_ostream SymbolOS(SymbolSection);
+ for (const auto &Sym : Symbols)
+ writeSymbol(Sym, Strings, SymbolOS);
+ }
+ RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection});
+
+ return OS << RIFF;
+}
+
+} // namespace clangd
+} // namespace clang
Added: clang-tools-extra/trunk/clangd/index/Serialization.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.h?rev=341375&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.h (added)
+++ clang-tools-extra/trunk/clangd/index/Serialization.h Tue Sep 4 09:16:50 2018
@@ -0,0 +1,48 @@
+//===--- Serialization.h - Binary serialization of index data ----*- C++-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a compact binary serialization of indexed symbols.
+//
+// It writes two sections:
+// - a string table (which is compressed)
+// - lists of encoded symbols
+//
+// The format has a simple versioning scheme: the version is embedded in the
+// data and non-current versions are rejected when reading.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
+#include "Index.h"
+#include "llvm/Support/Error.h"
+
+namespace clang {
+namespace clangd {
+
+// Specifies the contents of an index file to be written.
+struct IndexFileOut {
+ const SymbolSlab *Symbols;
+ // TODO: Support serializing symbol occurrences.
+ // TODO: Support serializing Dex posting lists.
+};
+// Serializes an index file. (This is a RIFF container chunk).
+llvm::raw_ostream &operator<<(llvm::raw_ostream &, const IndexFileOut &);
+
+// Holds the contents of an index file that was read.
+struct IndexFileIn {
+ llvm::Optional<SymbolSlab> Symbols;
+};
+// Parse an index file. The input must be a RIFF container chunk.
+llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Modified: clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp Tue Sep 4 09:16:50 2018
@@ -9,6 +9,7 @@
#include "SymbolYAML.h"
#include "Index.h"
+#include "Serialization.h"
#include "dex/DexIndex.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
@@ -189,10 +190,20 @@ std::unique_ptr<SymbolIndex> loadIndex(l
llvm::errs() << "Can't open " << SymbolFile << "\n";
return nullptr;
}
- auto Slab = symbolsFromYAML(Buffer.get()->getBuffer());
+ StringRef Data = Buffer->get()->getBuffer();
- return UseDex ? dex::DexIndex::build(std::move(Slab))
- : MemIndex::build(std::move(Slab), RefSlab());
+ llvm::Optional<SymbolSlab> Slab;
+ if (Data.startswith("RIFF")) { // Magic for binary index file.
+ if (auto RIFF = readIndexFile(Data))
+ Slab = std::move(RIFF->Symbols);
+ else
+ llvm::errs() << "Bad RIFF: " << llvm::toString(RIFF.takeError()) << "\n";
+ } else {
+ Slab = symbolsFromYAML(Data);
+ }
+
+ return UseDex ? dex::DexIndex::build(std::move(*Slab))
+ : MemIndex::build(std::move(*Slab), RefSlab());
}
} // namespace clangd
Modified: clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp Tue Sep 4 09:16:50 2018
@@ -10,6 +10,7 @@
#include "ClangdLSPServer.h"
#include "JSONRPCDispatcher.h"
#include "Path.h"
+#include "RIFF.h"
#include "Trace.h"
#include "index/SymbolYAML.h"
#include "index/dex/DexIndex.h"
Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Tue Sep 4 09:16:50 2018
@@ -26,6 +26,8 @@ add_extra_unittest(ClangdTests
HeadersTests.cpp
IndexTests.cpp
QualityTests.cpp
+ RIFFTests.cpp
+ SerializationTests.cpp
SourceCodeTests.cpp
SymbolCollectorTests.cpp
SyncAPI.cpp
Added: clang-tools-extra/trunk/unittests/clangd/RIFFTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/RIFFTests.cpp?rev=341375&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/RIFFTests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/RIFFTests.cpp Tue Sep 4 09:16:50 2018
@@ -0,0 +1,39 @@
+//===-- RIFFTests.cpp - Binary container unit tests -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RIFF.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+using namespace llvm;
+using ::testing::ElementsAre;
+
+TEST(RIFFTest, File) {
+ riff::File File{riff::fourCC("test"),
+ {
+ {riff::fourCC("even"), "abcd"},
+ {riff::fourCC("oddd"), "abcde"},
+ }};
+ StringRef Serialized = StringRef("RIFF\x1e\0\0\0test"
+ "even\x04\0\0\0abcd"
+ "oddd\x05\0\0\0abcde\0",
+ 38);
+
+ EXPECT_EQ(llvm::to_string(File), Serialized);
+ auto Parsed = riff::readFile(Serialized);
+ ASSERT_TRUE(bool(Parsed)) << Parsed.takeError();
+ EXPECT_EQ(*Parsed, File);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Added: clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp?rev=341375&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp Tue Sep 4 09:16:50 2018
@@ -0,0 +1,138 @@
+//===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/Serialization.h"
+#include "index/SymbolYAML.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+using testing::UnorderedElementsAreArray;
+namespace clang {
+namespace clangd {
+namespace {
+
+const char *YAML1 = R"(
+---
+ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
+Name: 'Foo1'
+Scope: 'clang::'
+SymInfo:
+ Kind: Function
+ Lang: Cpp
+CanonicalDeclaration:
+ FileURI: file:///path/foo.h
+ Start:
+ Line: 1
+ Column: 0
+ End:
+ Line: 1
+ Column: 1
+IsIndexedForCodeCompletion: true
+Documentation: 'Foo doc'
+ReturnType: 'int'
+IncludeHeaders:
+ - Header: 'include1'
+ References: 7
+ - Header: 'include2'
+ References: 3
+...
+)";
+
+const char *YAML2 = R"(
+---
+ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
+Name: 'Foo2'
+Scope: 'clang::'
+SymInfo:
+ Kind: Function
+ Lang: Cpp
+CanonicalDeclaration:
+ FileURI: file:///path/bar.h
+ Start:
+ Line: 1
+ Column: 0
+ End:
+ Line: 1
+ Column: 1
+IsIndexedForCodeCompletion: false
+Signature: '-sig'
+CompletionSnippetSuffix: '-snippet'
+...
+)";
+
+MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
+MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") {
+ return (arg.IncludeHeader == IncludeHeader) && (arg.References == References);
+}
+
+TEST(SerializationTest, YAMLConversions) {
+ auto Symbols1 = symbolsFromYAML(YAML1);
+ ASSERT_EQ(Symbols1.size(), 1u);
+ const auto &Sym1 = *Symbols1.begin();
+ EXPECT_THAT(Sym1, QName("clang::Foo1"));
+ EXPECT_EQ(Sym1.Signature, "");
+ EXPECT_EQ(Sym1.Documentation, "Foo doc");
+ EXPECT_EQ(Sym1.ReturnType, "int");
+ EXPECT_EQ(Sym1.CanonicalDeclaration.FileURI, "file:///path/foo.h");
+ EXPECT_TRUE(Sym1.IsIndexedForCodeCompletion);
+ EXPECT_THAT(Sym1.IncludeHeaders,
+ UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u),
+ IncludeHeaderWithRef("include2", 3u)));
+
+ auto Symbols2 = symbolsFromYAML(YAML2);
+ ASSERT_EQ(Symbols2.size(), 1u);
+ const auto &Sym2 = *Symbols2.begin();
+ EXPECT_THAT(Sym2, QName("clang::Foo2"));
+ EXPECT_EQ(Sym2.Signature, "-sig");
+ EXPECT_EQ(Sym2.ReturnType, "");
+ EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h");
+ EXPECT_FALSE(Sym2.IsIndexedForCodeCompletion);
+
+ std::string ConcatenatedYAML;
+ {
+ llvm::raw_string_ostream OS(ConcatenatedYAML);
+ SymbolsToYAML(Symbols1, OS);
+ SymbolsToYAML(Symbols2, OS);
+ }
+ auto ConcatenatedSymbols = symbolsFromYAML(ConcatenatedYAML);
+ EXPECT_THAT(ConcatenatedSymbols,
+ UnorderedElementsAre(QName("clang::Foo1"), QName("clang::Foo2")));
+}
+
+std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
+ std::vector<std::string> Result;
+ for (const auto &Sym : Slab)
+ Result.push_back(SymbolToYAML(Sym));
+ return Result;
+}
+
+TEST(SerializationTest, BinaryConversions) {
+ // We reuse the test symbols from YAML.
+ auto Slab = symbolsFromYAML(std::string(YAML1) + YAML2);
+ ASSERT_EQ(Slab.size(), 2u);
+
+ // Write to binary format, and parse again.
+ IndexFileOut Out;
+ Out.Symbols = &Slab;
+ std::string Serialized = llvm::to_string(Out);
+
+ auto In = readIndexFile(Serialized);
+ ASSERT_TRUE(bool(In)) << In.takeError();
+ ASSERT_TRUE(In->Symbols);
+
+ // Assert the YAML serializations match, for nice comparisons and diffs.
+ EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
+ UnorderedElementsAreArray(YAMLFromSymbols(Slab)));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Modified: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp?rev=341375&r1=341374&r2=341375&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp Tue Sep 4 09:16:50 2018
@@ -48,7 +48,6 @@ using testing::UnorderedElementsAreArray
MATCHER_P(Labeled, Label, "") {
return (arg.Name + arg.Signature).str() == Label;
}
-MATCHER(HasReturnType, "") { return !arg.ReturnType.empty(); }
MATCHER_P(ReturnType, D, "") { return arg.ReturnType == D; }
MATCHER_P(Doc, D, "") { return arg.Documentation == D; }
MATCHER_P(Snippet, S, "") {
@@ -744,84 +743,6 @@ TEST_F(SymbolCollectorTest, Snippet) {
Snippet("ff(${1:int x}, ${2:double y})"))));
}
-TEST_F(SymbolCollectorTest, YAMLConversions) {
- const std::string YAML1 = R"(
----
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856
-Name: 'Foo1'
-Scope: 'clang::'
-SymInfo:
- Kind: Function
- Lang: Cpp
-CanonicalDeclaration:
- FileURI: file:///path/foo.h
- Start:
- Line: 1
- Column: 0
- End:
- Line: 1
- Column: 1
-IsIndexedForCodeCompletion: true
-Documentation: 'Foo doc'
-ReturnType: 'int'
-IncludeHeaders:
- - Header: 'include1'
- References: 7
- - Header: 'include2'
- References: 3
-...
-)";
- const std::string YAML2 = R"(
----
-ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858
-Name: 'Foo2'
-Scope: 'clang::'
-SymInfo:
- Kind: Function
- Lang: Cpp
-CanonicalDeclaration:
- FileURI: file:///path/bar.h
- Start:
- Line: 1
- Column: 0
- End:
- Line: 1
- Column: 1
-IsIndexedForCodeCompletion: false
-Signature: '-sig'
-CompletionSnippetSuffix: '-snippet'
-...
-)";
-
- auto Symbols1 = symbolsFromYAML(YAML1);
-
- EXPECT_THAT(Symbols1,
- UnorderedElementsAre(AllOf(QName("clang::Foo1"), Labeled("Foo1"),
- Doc("Foo doc"), ReturnType("int"),
- DeclURI("file:///path/foo.h"),
- ForCodeCompletion(true))));
- auto &Sym1 = *Symbols1.begin();
- EXPECT_THAT(Sym1.IncludeHeaders,
- UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u),
- IncludeHeaderWithRef("include2", 3u)));
- auto Symbols2 = symbolsFromYAML(YAML2);
- EXPECT_THAT(Symbols2, UnorderedElementsAre(AllOf(
- QName("clang::Foo2"), Labeled("Foo2-sig"),
- Not(HasReturnType()), DeclURI("file:///path/bar.h"),
- ForCodeCompletion(false))));
-
- std::string ConcatenatedYAML;
- {
- llvm::raw_string_ostream OS(ConcatenatedYAML);
- SymbolsToYAML(Symbols1, OS);
- SymbolsToYAML(Symbols2, OS);
- }
- auto ConcatenatedSymbols = symbolsFromYAML(ConcatenatedYAML);
- EXPECT_THAT(ConcatenatedSymbols,
- UnorderedElementsAre(QName("clang::Foo1"),
- QName("clang::Foo2")));
-}
-
TEST_F(SymbolCollectorTest, IncludeHeaderSameAsFileURI) {
CollectorOpts.CollectIncludePath = true;
runSymbolCollector("class Foo {};", /*Main=*/"");
More information about the cfe-commits
mailing list