[clang-tools-extra] r341375 - [clangd] Define a compact binary serialization fomat for symbol slab/index.

Mikael Holmén via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 5 05:20:47 PDT 2018



On 09/05/2018 12:41 PM, Sam McCall wrote:
> Thanks. Unclear to me whether it's the enum class or the anonymous 
> namespace that's triggering this (I believe) compiler bug, but r341459 
> may help...

Still doesn't work.

In file included from 
../tools/clang/include/clang/Frontend/CommandLineSourceLoc.h:19:0,
                  from 
../tools/clang/include/clang/Frontend/FrontendOptions.h:13,
                  from 
../tools/clang/include/clang/Frontend/CompilerInvocation.h:19,
                  from 
../tools/clang/include/clang/Frontend/CompilerInstance.h:16,
                  from 
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:22:
../include/llvm/Support/CommandLine.h:606:29: error: expected 
primary-expression before '{' token
    llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                              ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67:16: 
note: in expansion of macro 'clEnumValN'
                 clEnumValN(Format::YAML, "yaml", "human-readable YAML 
format"),
                 ^
../include/llvm/Support/CommandLine.h:606:29: error: expected 
primary-expression before '{' token
    llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
                              ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:68:16: 
note: in expansion of macro 'clEnumValN'
                 clEnumValN(Format::Binary, "binary", "binary RIFF 
format")),
                 ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:69:27: 
error: 'Format' is not a class, namespace, or enumeration
             llvm::cl::init(Format::YAML));
                            ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp: 
In function 'int main(int, const char**)':
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:276:23: 
error: 'clang::clangd::Format' is not a class, namespace, or enumeration
    case clang::clangd::Format::YAML:
                        ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:279:23: 
error: 'clang::clangd::Format' is not a class, namespace, or enumeration
    case clang::clangd::Format::Binary: {
                        ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10: 
warning: enumeration value 'YAML' not handled in switch [-Wswitch]
    switch (clang::clangd::Format) {
           ^
../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10: 
warning: enumeration value 'Binary' not handled in switch [-Wswitch]

Changing the name of the enum from Format to Formats (so the name isn't 
the same as the variable) makes it compile.

I.e:

@@ -58,17 +58,17 @@ static llvm::cl::opt<bool> MergeOnTheFly(
          "usage and an almost instant reduce stage. Optimal for running 
as a "
          "standalone tool, but cannot be used with multi-process 
executors like "
          "MapReduce."),
      llvm::cl::init(true), llvm::cl::Hidden);

-enum Format { YAML, Binary };
-static llvm::cl::opt<Format>
+enum Formats { YAML, Binary };
+static llvm::cl::opt<Formats>
      Format("format", llvm::cl::desc("Format of the index to be written"),
             llvm::cl::values(
-               clEnumValN(Format::YAML, "yaml", "human-readable YAML 
format"),
-               clEnumValN(Format::Binary, "binary", "binary RIFF format")),
-           llvm::cl::init(Format::YAML));
+               clEnumValN(Formats::YAML, "yaml", "human-readable YAML 
format"),
+               clEnumValN(Formats::Binary, "binary", "binary RIFF 
format")),
+           llvm::cl::init(Formats::YAML));

  /// Responsible for aggregating symbols from each processed file and 
producing
  /// the final results. All methods in this class must be thread-safe,
  /// 'consumeSymbols' may be called from multiple threads.
  class SymbolsConsumer {
@@ -271,14 +271,14 @@ int main(int argc, const char **argv) {
    }
    // Reduce phase: combine symbols with the same IDs.
    auto UniqueSymbols = Consumer->mergeResults();
    // Output phase: emit result symbols.
    switch (clang::clangd::Format) {
-  case clang::clangd::Format::YAML:
+  case clang::clangd::Formats::YAML:
      SymbolsToYAML(UniqueSymbols, llvm::outs());
      break;
-  case clang::clangd::Format::Binary: {
+  case clang::clangd::Formats::Binary: {
      clang::clangd::IndexFileOut Out;
      Out.Symbols = &UniqueSymbols;
      llvm::outs() << Out;
    }
    }

seems to compile with gcc 5.4.0.

I've no idea if this is a gcc bug or if it's a bug in clang to not also 
complain about it.

/Mikael

> 
> On Wed, Sep 5, 2018 at 11:05 AM Mikael Holmén 
> <mikael.holmen at ericsson.com <mailto:mikael.holmen at ericsson.com>> wrote:
> 
> 
> 
>     On 09/05/2018 09:56 AM, Sam McCall wrote:
>      > Sorry! r341451 should fix this, will keep an eye on the buildbots.
>      >
> 
>     Now it compiles with clang 3.6.0 but with gcc 5.4.0 it fails with
> 
>     /proj/bbi_twh/wh_bbi/x86_64-Linux2/bbigcc/1.5.4.0/crosscompiler/bin/g++
>     <http://1.5.4.0/crosscompiler/bin/g++>
>     -I/proj/bbi_twh/wh_bbi/x86_64-Linux2/bbilibxml2/1/include
>     -DGTEST_HAS_RTTI=0 -D_DEBUG -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS
>     -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS
>     -Itools/clang/tools/extra/clangd/global-symbol-builder
>     -I../tools/clang/tools/extra/clangd/global-symbol-builder
>     -I../tools/clang/include -Itools/clang/include -I/usr/include/libxml2
>     -Iinclude -I../include
>     -I../tools/clang/tools/extra/clangd/global-symbol-builder/..
>     -I/repo/app/valgrind/3.11.0/include  -fPIC -fvisibility-inlines-hidden
>     -Werror=date-time -std=c++11 -Wall -Wextra -Wno-unused-parameter
>     -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic
>     -Wno-long-long -Wno-maybe-uninitialized -Wdelete-non-virtual-dtor
>     -Wno-comment -fdiagnostics-color -ffunction-sections -fdata-sections
>     -fno-common -Woverloaded-virtual -fno-strict-aliasing -O3    -UNDEBUG
>     -fno-exceptions -fno-rtti -MMD -MT
>     tools/clang/tools/extra/clangd/global-symbol-builder/CMakeFiles/global-symbol-builder.dir/GlobalSymbolBuilderMain.cpp.o
> 
>     -MF
>     tools/clang/tools/extra/clangd/global-symbol-builder/CMakeFiles/global-symbol-builder.dir/GlobalSymbolBuilderMain.cpp.o.d
> 
>     -o
>     tools/clang/tools/extra/clangd/global-symbol-builder/CMakeFiles/global-symbol-builder.dir/GlobalSymbolBuilderMain.cpp.o
> 
>     -c
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
>     In file included from
>     ../tools/clang/include/clang/Frontend/CommandLineSourceLoc.h:19:0,
>                        from
>     ../tools/clang/include/clang/Frontend/FrontendOptions.h:13,
>                        from
>     ../tools/clang/include/clang/Frontend/CompilerInvocation.h:19,
>                        from
>     ../tools/clang/include/clang/Frontend/CompilerInstance.h:16,
>                        from
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:22:
>     ../include/llvm/Support/CommandLine.h:606:52: error: invalid cast from
>     type 'llvm::cl::opt<clang::clangd::{anonymous}::Format>' to type 'int'
>          llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
>                                                           ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67:16:
> 
>     note: in expansion of macro 'clEnumValN'
>                       clEnumValN(Format::YAML, "yaml", "human-readable YAML
>     format"),
>                       ^
>     ../include/llvm/Support/CommandLine.h:606:29: error: expected
>     primary-expression before '{' token
>          llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
>                                    ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:67:16:
> 
>     note: in expansion of macro 'clEnumValN'
>                       clEnumValN(Format::YAML, "yaml", "human-readable YAML
>     format"),
>                       ^
>     ../include/llvm/Support/CommandLine.h:606:52: error: invalid cast from
>     type 'llvm::cl::opt<clang::clangd::{anonymous}::Format>' to type 'int'
>          llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
>                                                           ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:68:16:
> 
>     note: in expansion of macro 'clEnumValN'
>                       clEnumValN(Format::Binary, "binary", "binary RIFF
>     format")),
>                       ^
>     ../include/llvm/Support/CommandLine.h:606:29: error: expected
>     primary-expression before '{' token
>          llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
>                                    ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:68:16:
> 
>     note: in expansion of macro 'clEnumValN'
>                       clEnumValN(Format::Binary, "binary", "binary RIFF
>     format")),
>                       ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:69:27:
> 
>     error: 'Format' is not a class, namespace, or enumeration
>                   llvm::cl::init(Format::YAML));
>                                  ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:
> 
>     In function 'int main(int, const char**)':
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:276:23:
> 
>     error: 'clang::clangd::Format' is not a class, namespace, or enumeration
>          case clang::clangd::Format::YAML:
>                              ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:279:23:
> 
>     error: 'clang::clangd::Format' is not a class, namespace, or enumeration
>          case clang::clangd::Format::Binary: {
>                              ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10:
> 
>     warning: enumeration value 'YAML' not handled in switch [-Wswitch]
>          switch (clang::clangd::Format) {
>                 ^
>     ../tools/clang/tools/extra/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp:275:10:
> 
>     warning: enumeration value 'Binary' not handled in switch [-Wswitch]
> 
>     /Mikael
> 
>      > On Wed, Sep 5, 2018 at 8:46 AM Mikael Holmén
>     <mikael.holmen at ericsson.com <mailto:mikael.holmen at ericsson.com>
>      > <mailto:mikael.holmen at ericsson.com
>     <mailto:mikael.holmen at ericsson.com>>> wrote:
>      >
>      >     Hi Sam,
>      >
>      >     This doesn't compile for me. Both clang 3.6.0 and gcc 5.4.0
>     complain:
>      >
>      >     [1/6] Building CXX object
>      >   
>       tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
>      >     FAILED:
>      >   
>       tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
>      >
>      >
>      >     /usr/bin/clang++  -march=corei7  -DGTEST_HAS_RTTI=0 -D_DEBUG
>      >     -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
>      >     -D__STDC_LIMIT_MACROS -Itools/clang/tools/extra/clangd
>      >     -I../tools/clang/tools/extra/clangd -I../tools/clang/include
>      >     -Itools/clang/include -I/usr/include/libxml2 -Iinclude
>     -I../include
>      >     -I/proj/flexasic/app/valgrind/3.11.0/include  -fPIC
>      >     -fvisibility-inlines-hidden -Werror -Werror=date-time
>     -std=c++11 -Wall
>      >     -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual
>      >     -Wmissing-field-initializers -pedantic -Wno-long-long
>      >     -Wcovered-switch-default -Wnon-virtual-dtor
>     -Wdelete-non-virtual-dtor
>      >     -Wstring-conversion -fdiagnostics-color -ffunction-sections
>      >     -fdata-sections -fno-common -Woverloaded-virtual
>     -Wno-nested-anon-types
>      >     -O3    -UNDEBUG  -fno-exceptions -fno-rtti -MMD -MT
>      >   
>       tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
>      >
>      >     -MF
>      >   
>       tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o.d
>      >
>      >     -o
>      >   
>       tools/clang/tools/extra/clangd/CMakeFiles/clangDaemon.dir/index/Serialization.cpp.o
>      >
>      >     -c ../tools/clang/tools/extra/clangd/index/Serialization.cpp
>      >     ../tools/clang/tools/extra/clangd/index/Serialization.cpp:154:10:
>      >     error:
>      >     no viable conversion from 'clang::clangd::(anonymous
>      >     namespace)::StringTableIn' to 'Expected<clang::clangd::(anonymous
>      >     namespace)::StringTableIn>'
>      >         return Table;
>      >                ^~~~~
>      >     ../include/llvm/Support/Error.h:434:41: note: candidate
>     constructor
>      >     (the
>      >     implicit copy constructor) not viable: no known conversion from
>      >     'clang::clangd::(anonymous namespace)::StringTableIn' to 'const
>      >     llvm::Expected<clang::clangd::(anonymous
>     namespace)::StringTableIn> &'
>      >     for 1st argument
>      >     template <class T> class LLVM_NODISCARD Expected {
>      >                                               ^
>      >     ../include/llvm/Support/Error.h:456:3: note: candidate
>     constructor not
>      >     viable: no known conversion from 'clang::clangd::(anonymous
>      >     namespace)::StringTableIn' to 'llvm::Error' for 1st argument
>      >         Expected(Error Err)
>      >         ^
>      >     ../include/llvm/Support/Error.h:470:3: note: candidate
>     constructor not
>      >     viable: no known conversion from 'clang::clangd::(anonymous
>      >     namespace)::StringTableIn' to 'llvm::ErrorSuccess' for 1st
>     argument
>      >         Expected(ErrorSuccess) = delete;
>      >         ^
>      >     ../include/llvm/Support/Error.h:488:3: note: candidate
>     constructor not
>      >     viable: no known conversion from 'clang::clangd::(anonymous
>      >     namespace)::StringTableIn' to
>     'llvm::Expected<clang::clangd::(anonymous
>      >     namespace)::StringTableIn> &&' for 1st argument
>      >         Expected(Expected &&Other) {
>     moveConstruct(std::move(Other)); }
>      >         ^
>      >     ../include/llvm/Support/Error.h:476:36: note: candidate template
>      >     ignored: disabled by 'enable_if' [with OtherT =
>      >     clang::clangd::(anonymous namespace)::StringTableIn &]
>      >                  typename std::enable_if<std::is_convertible<OtherT,
>      >     T>::value>::type
>      >                                          ^
>      >     ../include/llvm/Support/Error.h:493:3: note: candidate template
>      >     ignored:
>      >     could not match 'Expected<type-parameter-0-0>' against
>      >     'clang::clangd::(anonymous namespace)::StringTableIn'
>      >         Expected(Expected<OtherT> &&Other,
>      >         ^
>      >     In file included from
>      >     ../tools/clang/tools/extra/clangd/index/Serialization.cpp:9:
>      >     In file included from
>      >     ../tools/clang/tools/extra/clangd/index/Serialization.h:23:
>      >     In file included from
>      >     ../tools/clang/tools/extra/clangd/index/Index.h:13:
>      >     In file included from
>      >     ../tools/clang/include/clang/Index/IndexSymbol.h:14:
>      >     In file included from
>     ../tools/clang/include/clang/Lex/MacroInfo.h:18:
>      >     In file included from
>     ../tools/clang/include/clang/Lex/Token.h:17:
>      >     In file included from
>      >     ../tools/clang/include/clang/Basic/SourceLocation.h:19:
>      >     In file included from ../include/llvm/ADT/StringRef.h:13:
>      >     In file included from ../include/llvm/ADT/STLExtras.h:20:
>      >     ../include/llvm/ADT/Optional.h:41:28: error: call to
>     implicitly-deleted
>      >     copy constructor of 'clang::clangd::SymbolSlab'
>      >             new (storage.buffer) T(*O.getPointer());
>      >                                  ^ ~~~~~~~~~~~~~~~
>      >     ../include/llvm/ADT/Optional.h:141:3: note: in instantiation
>     of member
>      >     function
>      >   
>       'llvm::optional_detail::OptionalStorage<clang::clangd::SymbolSlab,
>      >     false>::OptionalStorage' requested here
>      >         Optional(const Optional &O) = default;
>      >         ^
>      >   
>       ../tools/clang/tools/extra/clangd/index/Serialization.cpp:325:10:
>     note:
>      >     in instantiation of function template specialization
>      >   
>       'llvm::Expected<clang::clangd::IndexFileIn>::Expected<clang::clangd::IndexFileIn
>      >
>      >     &>' requested here
>      >         return Result;
>      >                ^
>      >     ../tools/clang/tools/extra/clangd/index/Index.h:324:26: note:
>     copy
>      >     constructor of 'SymbolSlab' is implicitly deleted because
>     field 'Arena'
>      >     has a deleted copy constructor
>      >         llvm::BumpPtrAllocator Arena; // Owns Symbol data that the
>      >     Symbols do
>      >     not.
>      >                                ^
>      >     ../include/llvm/Support/Allocator.h:157:3: note: copy
>     constructor is
>      >     implicitly deleted because
>     'BumpPtrAllocatorImpl<llvm::MallocAllocator,
>      >     4096, 4096>' has a user-declared move constructor
>      >         BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old)
>      >         ^
>      >     2 errors generated.
>      >
>      >     Several buildbots fail the same way.
>      >
>      >     /Mikael
>      >
>      >     On 09/04/2018 06:16 PM, Sam McCall via cfe-commits wrote:
>      >      > Author: sammccall
>      >      > Date: Tue Sep  4 09:16:50 2018
>      >      > New Revision: 341375
>      >      >
>      >      > URL: http://llvm.org/viewvc/llvm-project?rev=341375&view=rev
>      >      > Log:
>      >      > [clangd] Define a compact binary serialization fomat for
>     symbol
>      >     slab/index.
>      >      >
>      >      > Summary:
>      >      > This is intended to replace the current YAML format for
>     general use.
>      >      > It's ~10x more compact than YAML, and ~40% more compact than
>      >     gzipped YAML:
>      >      >    llvmidx.riff = 20M, llvmidx.yaml = 272M,
>     llvmidx.yaml.gz = 32M
>      >      > It's also simpler/faster to read and write.
>      >      >
>      >      > The format is a RIFF container (chunks of (type, size,
>     data)) with:
>      >      >   - a compressed string table
>      >      >   - simple binary encoding of symbols (with varints for
>     compactness)
>      >      > It can be extended to include occurrences, Dex posting
>     lists, etc.
>      >      >
>      >      > There's no rich backwards-compatibility scheme, but a version
>      >     number is included
>      >      > so we can detect incompatible files and do ad-hoc back-compat.
>      >      >
>      >      > Alternatives considered:
>      >      >   - compressed YAML or JSON: bulky and slow to load
>      >      >   - llvm bitstream: confusing model and libraries are hard to
>      >     use. My attempt
>      >      >     produced slightly larger files, and the code was
>     longer and
>      >     slower.
>      >      >   - protobuf or similar: would be really nice (esp for
>      >     back-compat) but the
>      >      >     dependency is a big hassle
>      >      >   - ad-hoc binary format without a container: it seems clear
>      >     we're going
>      >      >     to add posting lists and occurrences here, and that
>     they will
>      >     benefit
>      >      >     from sharing a string table. The container makes it
>     easy to debug
>      >      >     these pieces in isolation, and make them optional.
>      >      >
>      >      > Reviewers: ioeric
>      >      >
>      >      > Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, mgrang,
>      >     arphaman, kadircet, cfe-commits
>      >      >
>      >      > Differential Revision: https://reviews.llvm.org/D51585
>      >      >
>      >      > Added:
>      >      >      clang-tools-extra/trunk/clangd/RIFF.cpp
>      >      >      clang-tools-extra/trunk/clangd/RIFF.h
>      >      >      clang-tools-extra/trunk/clangd/index/Serialization.cpp
>      >      >      clang-tools-extra/trunk/clangd/index/Serialization.h
>      >      >      clang-tools-extra/trunk/unittests/clangd/RIFFTests.cpp
>      >      >     
>     clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
>      >      > Modified:
>      >      >      clang-tools-extra/trunk/clangd/CMakeLists.txt
>      >      >
>      >   
>       clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
>      >      >      clang-tools-extra/trunk/clangd/index/Index.cpp
>      >      >      clang-tools-extra/trunk/clangd/index/Index.h
>      >      >      clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
>      >      >      clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
>      >      >      clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
>      >      >
>      >     clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
>      >      >
>      >      > Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=341375&r1=341374&r2=341375&view=diff
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
>      >      > +++ clang-tools-extra/trunk/clangd/CMakeLists.txt Tue Sep  4
>      >     09:16:50 2018
>      >      > @@ -29,6 +29,7 @@ add_clang_library(clangDaemon
>      >      >     Protocol.cpp
>      >      >     ProtocolHandlers.cpp
>      >      >     Quality.cpp
>      >      > +  RIFF.cpp
>      >      >     SourceCode.cpp
>      >      >     Threading.cpp
>      >      >     Trace.cpp
>      >      > @@ -41,6 +42,7 @@ add_clang_library(clangDaemon
>      >      >     index/Index.cpp
>      >      >     index/MemIndex.cpp
>      >      >     index/Merge.cpp
>      >      > +  index/Serialization.cpp
>      >      >     index/SymbolCollector.cpp
>      >      >     index/SymbolYAML.cpp
>      >      >
>      >      >
>      >      > Added: clang-tools-extra/trunk/clangd/RIFF.cpp
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/RIFF.cpp?rev=341375&view=auto
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/RIFF.cpp (added)
>      >      > +++ clang-tools-extra/trunk/clangd/RIFF.cpp Tue Sep  4
>     09:16:50 2018
>      >      > @@ -0,0 +1,88 @@
>      >      > +//===--- RIFF.cpp - Binary container file format
>      >     --------------------------===//
>      >      > +//
>      >      > +//                     The LLVM Compiler Infrastructure
>      >      > +//
>      >      > +// This file is distributed under the University of Illinois
>      >     Open Source
>      >      > +// License. See LICENSE.TXT for details.
>      >      > +//
>      >      >
>      >   
>       +//===----------------------------------------------------------------------===//
>      >      > +
>      >      > +#include "RIFF.h"
>      >      > +#include "llvm/Support/Endian.h"
>      >      > +
>      >      > +using namespace llvm;
>      >      > +namespace clang {
>      >      > +namespace clangd {
>      >      > +namespace riff {
>      >      > +
>      >      > +static Error makeError(const char *Msg) {
>      >      > +  return createStringError(inconvertibleErrorCode(), Msg);
>      >      > +}
>      >      > +
>      >      > +Expected<Chunk> readChunk(StringRef &Stream) {
>      >      > +  if (Stream.size() < 8)
>      >      > +    return makeError("incomplete chunk header");
>      >      > +  Chunk C;
>      >      > +  std::copy(Stream.begin(), Stream.begin() + 4,
>     C.ID.begin());
>      >      > +  Stream = Stream.drop_front(4);
>      >      > +  uint32_t Len =
>      >     support::endian::read32le(Stream.take_front(4).begin());
>      >      > +  Stream = Stream.drop_front(4);
>      >      > +  if (Stream.size() < Len)
>      >      > +    return makeError("truncated chunk");
>      >      > +  C.Data = Stream.take_front(Len);
>      >      > +  Stream = Stream.drop_front(Len);
>      >      > +  if (Len % 2 & !Stream.empty()) { // Skip padding byte.
>      >      > +    if (Stream.front())
>      >      > +      return makeError("nonzero padding byte");
>      >      > +    Stream = Stream.drop_front();
>      >      > +  }
>      >      > +  return C;
>      >      > +};
>      >      > +
>      >      > +raw_ostream &operator<<(raw_ostream &OS, const Chunk &C) {
>      >      > +  OS.write(C.ID.begin(), C.ID.size());
>      >      > +  char Size[4];
>      >      > +  llvm::support::endian::write32le(Size, C.Data.size());
>      >      > +  OS.write(Size, sizeof(Size));
>      >      > +  OS << C.Data;
>      >      > +  if (C.Data.size() % 2)
>      >      > +    OS.write(0);
>      >      > +  return OS;
>      >      > +}
>      >      > +
>      >      > +llvm::Expected<File> readFile(llvm::StringRef Stream) {
>      >      > +  auto RIFF = readChunk(Stream);
>      >      > +  if (!RIFF)
>      >      > +    return RIFF.takeError();
>      >      > +  if (RIFF->ID != fourCC("RIFF"))
>      >      > +    return makeError("not a RIFF container");
>      >      > +  if (RIFF->Data.size() < 4)
>      >      > +    return makeError("RIFF chunk too short");
>      >      > +  File F;
>      >      > +  std::copy(RIFF->Data.begin(), RIFF->Data.begin() + 4,
>      >     F.Type.begin());
>      >      > +  for (llvm::StringRef Body = RIFF->Data.drop_front(4);
>      >     !Body.empty();)
>      >      > +    if (auto Chunk = readChunk(Body)) {
>      >      > +      F.Chunks.push_back(*Chunk);
>      >      > +    } else
>      >      > +      return Chunk.takeError();
>      >      > +  return F;
>      >      > +}
>      >      > +
>      >      > +raw_ostream &operator<<(raw_ostream &OS, const File &F) {
>      >      > +  // To avoid copies, we serialize the outer RIFF chunk
>     "by hand".
>      >      > +  size_t DataLen = 4; // Predict length of RIFF chunk data.
>      >      > +  for (const auto &C : F.Chunks)
>      >      > +    DataLen += 4 + 4 + C.Data.size() + (C.Data.size() % 2);
>      >      > +  OS << "RIFF";
>      >      > +  char Size[4];
>      >      > +  llvm::support::endian::write32le(Size, DataLen);
>      >      > +  OS.write(Size, sizeof(Size));
>      >      > +  OS.write(F.Type.begin(), F.Type.size());
>      >      > +  for (const auto &C : F.Chunks)
>      >      > +    OS << C;
>      >      > +  return OS;
>      >      > +}
>      >      > +
>      >      > +} // namespace riff
>      >      > +} // namespace clangd
>      >      > +} // namespace clang
>      >      >
>      >      > Added: clang-tools-extra/trunk/clangd/RIFF.h
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/RIFF.h?rev=341375&view=auto
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/RIFF.h (added)
>      >      > +++ clang-tools-extra/trunk/clangd/RIFF.h Tue Sep  4
>     09:16:50 2018
>      >      > @@ -0,0 +1,81 @@
>      >      > +//===--- RIFF.h - Binary container file format
>      >     -------------------*- C++-*-===//
>      >      > +//
>      >      > +//                     The LLVM Compiler Infrastructure
>      >      > +//
>      >      > +// This file is distributed under the University of Illinois
>      >     Open Source
>      >      > +// License. See LICENSE.TXT for details.
>      >      > +//
>      >      >
>      >   
>       +//===----------------------------------------------------------------------===//
>      >      > +//
>      >      > +// Tools for reading and writing data in RIFF containers.
>      >      > +//
>      >      > +// A chunk consists of:
>      >      > +//   - ID      : char[4]
>      >      > +//   - Length  : uint32
>      >      > +//   - Data    : byte[Length]
>      >      > +//   - Padding : byte[Length % 2]
>      >      > +// The semantics of a chunk's Data are determined by its ID.
>      >      > +// The format makes it easy to skip over uninteresting or
>      >     unknown chunks.
>      >      > +//
>      >      > +// A RIFF file is a single chunk with ID "RIFF". Its Data is:
>      >      > +//   - Type    : char[4]
>      >      > +//   - Chunks  : chunk[]
>      >      > +//
>      >      > +// This means that a RIFF file consists of:
>      >      > +//   - "RIFF"          : char[4]
>      >      > +//   - File length - 8 : uint32
>      >      > +//   - File type       : char[4]
>      >      > +//   - Chunks          : chunk[]
>      >      > +//
>      >      >
>      >   
>       +//===----------------------------------------------------------------------===//
>      >      > +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_RIFF_H
>      >      > +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_RIFF_H
>      >      > +#include "llvm/ADT/StringRef.h"
>      >      > +#include "llvm/Support/Error.h"
>      >      > +#include "llvm/Support/ScopedPrinter.h"
>      >      > +#include <array>
>      >      > +
>      >      > +namespace clang {
>      >      > +namespace clangd {
>      >      > +namespace riff {
>      >      > +
>      >      > +// A FourCC identifies a chunk in a file, or the type of file
>      >     itself.
>      >      > +using FourCC = std::array<char, 4>;
>      >      > +// Get a FourCC from a string literal, e.g. fourCC("RIFF").
>      >      > +inline constexpr FourCC fourCC(const char (&Literal)[5]) {
>      >      > +  return FourCC{{Literal[0], Literal[1], Literal[2],
>     Literal[3]}};
>      >      > +}
>      >      > +// A chunk is a section in a RIFF container.
>      >      > +struct Chunk {
>      >      > +  FourCC ID;
>      >      > +  llvm::StringRef Data;
>      >      > +};
>      >      > +inline bool operator==(const Chunk &L, const Chunk &R) {
>      >      > +  return std::tie(L.ID <http://L.ID> <http://L.ID>,
>     L.Data) == std::tie(R.ID <http://R.ID>
>      >     <http://R.ID>, R.Data);
>      >      > +}
>      >      > +// A File is a RIFF container, which is a typed chunk
>     sequence.
>      >      > +struct File {
>      >      > +  FourCC Type;
>      >      > +  std::vector<Chunk> Chunks;
>      >      > +};
>      >      > +inline bool operator==(const File &L, const File &R) {
>      >      > +  return std::tie(L.Type, L.Chunks) == std::tie(R.Type,
>     R.Chunks);
>      >      > +}
>      >      > +
>      >      > +// Reads a single chunk from the start of Stream.
>      >      > +// Stream is updated to exclude the consumed chunk.
>      >      > +llvm::Expected<Chunk> readChunk(llvm::StringRef &Stream);
>      >      > +
>      >      > +// Serialize a single chunk to OS.
>      >      > +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
>     const Chunk &);
>      >      > +
>      >      > +// Parses a RIFF file consisting of a single RIFF chunk.
>      >      > +llvm::Expected<File> readFile(llvm::StringRef Stream);
>      >      > +
>      >      > +// Serialize a RIFF file (i.e. a single RIFF chunk) to OS.
>      >      > +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
>     const File &);
>      >      > +
>      >      > +} // namespace riff
>      >      > +} // namespace clangd
>      >      > +} // namespace clang
>      >      > +#endif
>      >      >
>      >      > Modified:
>      >   
>       clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp?rev=341375&r1=341374&r2=341375&view=diff
>      >      >
>      >   
>       ==============================================================================
>      >      > ---
>      >   
>       clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
>      >     (original)
>      >      > +++
>      >   
>       clang-tools-extra/trunk/clangd/global-symbol-builder/GlobalSymbolBuilderMain.cpp
>      >     Tue Sep  4 09:16:50 2018
>      >      > @@ -7,15 +7,16 @@
>      >      >   //
>      >      >
>      >     
>       //===----------------------------------------------------------------------===//
>      >      >   //
>      >      > -// GlobalSymbolBuilder is a tool to generate YAML-format
>     symbols
>      >     across the
>      >      > -// whole project. This tools is for **experimental**
>     only. Don't
>      >     use it in
>      >      > -// production code.
>      >      > +// GlobalSymbolBuilder is a tool to extract symbols from
>     a whole
>      >     project.
>      >      > +// This tool is **experimental** only. Don't use it in
>      >     production code.
>      >      >   //
>      >      >
>      >     
>       //===----------------------------------------------------------------------===//
>      >      >
>      >      > +#include "RIFF.h"
>      >      >   #include "index/CanonicalIncludes.h"
>      >      >   #include "index/Index.h"
>      >      >   #include "index/Merge.h"
>      >      > +#include "index/Serialization.h"
>      >      >   #include "index/SymbolCollector.h"
>      >      >   #include "index/SymbolYAML.h"
>      >      >   #include "clang/Frontend/CompilerInstance.h"
>      >      > @@ -59,6 +60,14 @@ static llvm::cl::opt<bool> MergeOnTheFly
>      >      >           "MapReduce."),
>      >      >       llvm::cl::init(true), llvm::cl::Hidden);
>      >      >
>      >      > +enum class Format { YAML, Binary };
>      >      > +static llvm::cl::opt<Format>
>      >      > +    Format("format", llvm::cl::desc("Format of the index
>     to be
>      >     written"),
>      >      > +           llvm::cl::values(
>      >      > +               clEnumValN(Format::YAML, "yaml",
>     "human-readable
>      >     YAML format"),
>      >      > +               clEnumValN(Format::Binary, "binary",
>     "binary RIFF
>      >     format")),
>      >      > +           llvm::cl::init(Format::YAML));
>      >      > +
>      >      >   /// Responsible for aggregating symbols from each processed
>      >     file and producing
>      >      >   /// the final results. All methods in this class must be
>      >     thread-safe,
>      >      >   /// 'consumeSymbols' may be called from multiple threads.
>      >      > @@ -210,8 +219,8 @@ int main(int argc, const char **argv) {
>      >      >     llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
>      >      >
>      >      >     const char *Overview = R"(
>      >      > -  This is an **experimental** tool to generate YAML-format
>      >     project-wide symbols
>      >      > -  for clangd (global code completion). It would be
>     changed and
>      >     deprecated
>      >      > +  This is an **experimental** tool to extract symbols from a
>      >     whole project
>      >      > +  for clangd (global code completion). It will be changed and
>      >     deprecated
>      >      >     eventually. Don't use it in production code!
>      >      >
>      >      >     Example usage for building index for the whole project
>     using
>      >     CMake compile
>      >      > @@ -262,7 +271,16 @@ int main(int argc, const char **argv) {
>      >      >     }
>      >      >     // Reduce phase: combine symbols with the same IDs.
>      >      >     auto UniqueSymbols = Consumer->mergeResults();
>      >      > -  // Output phase: emit YAML for result symbols.
>      >      > -  SymbolsToYAML(UniqueSymbols, llvm::outs());
>      >      > +  // Output phase: emit result symbols.
>      >      > +  switch (clang::clangd::Format) {
>      >      > +  case clang::clangd::Format::YAML:
>      >      > +    SymbolsToYAML(UniqueSymbols, llvm::outs());
>      >      > +    break;
>      >      > +  case clang::clangd::Format::Binary: {
>      >      > +    clang::clangd::IndexFileOut Out;
>      >      > +    Out.Symbols = &UniqueSymbols;
>      >      > +    llvm::outs() << Out;
>      >      > +  }
>      >      > +  }
>      >      >     return 0;
>      >      >   }
>      >      >
>      >      > Modified: clang-tools-extra/trunk/clangd/index/Index.cpp
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=341375&r1=341374&r2=341375&view=diff
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/index/Index.cpp (original)
>      >      > +++ clang-tools-extra/trunk/clangd/index/Index.cpp Tue Sep  4
>      >     09:16:50 2018
>      >      > @@ -10,6 +10,7 @@
>      >      >   #include "Index.h"
>      >      >   #include "llvm/ADT/StringExtras.h"
>      >      >   #include "llvm/ADT/StringRef.h"
>      >      > +#include "llvm/Support/Error.h"
>      >      >   #include "llvm/Support/SHA1.h"
>      >      >   #include "llvm/Support/raw_ostream.h"
>      >      >
>      >      > @@ -28,21 +29,20 @@ SymbolID::SymbolID(StringRef USR)
>      >      >       : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {}
>      >      >
>      >      >   raw_ostream &operator<<(raw_ostream &OS, const SymbolID
>     &ID) {
>      >      > -  OS << toHex(toStringRef(ID.HashValue));
>      >      > -  return OS;
>      >      > +  return OS << toHex(ID.raw());
>      >      >   }
>      >      >
>      >      > -std::string SymbolID::str() const {
>      >      > -  std::string ID;
>      >      > -  llvm::raw_string_ostream OS(ID);
>      >      > -  OS << *this;
>      >      > -  return OS.str();
>      >      > +SymbolID SymbolID::fromRaw(llvm::StringRef Raw) {
>      >      > +  SymbolID ID;
>      >      > +  assert(Raw.size() == RawSize);
>      >      > +  memcpy(ID.HashValue.data(), Raw.data(), RawSize);
>      >      > +  return ID;
>      >      >   }
>      >      >
>      >      > +std::string SymbolID::str() const { return toHex(raw()); }
>      >      > +
>      >      >   void operator>>(StringRef Str, SymbolID &ID) {
>      >      > -  std::string HexString = fromHex(Str);
>      >      > -  assert(HexString.size() == ID.HashValue.size());
>      >      > -  std::copy(HexString.begin(), HexString.end(),
>      >     ID.HashValue.begin());
>      >      > +  ID = SymbolID::fromRaw(fromHex(Str));
>      >      >   }
>      >      >
>      >      >   raw_ostream &operator<<(raw_ostream &OS, SymbolOrigin O) {
>      >      > @@ -78,34 +78,18 @@ SymbolSlab::const_iterator SymbolSlab::f
>      >      >   }
>      >      >
>      >      >   // Copy the underlying data of the symbol into the owned
>     arena.
>      >      > -static void own(Symbol &S, llvm::UniqueStringSaver &Strings,
>      >      > -                BumpPtrAllocator &Arena) {
>      >      > -  // Intern replaces V with a reference to the same
>     string owned
>      >     by the arena.
>      >      > -  auto Intern = [&](StringRef &V) { V = Strings.save(V); };
>      >      > -
>      >      > -  // We need to copy every StringRef field onto the arena.
>      >      > -  Intern(S.Name);
>      >      > -  Intern(S.Scope);
>      >      > -  Intern(S.CanonicalDeclaration.FileURI);
>      >      > -  Intern(S.Definition.FileURI);
>      >      > -
>      >      > -  Intern(S.Signature);
>      >      > -  Intern(S.CompletionSnippetSuffix);
>      >      > -
>      >      > -  Intern(S.Documentation);
>      >      > -  Intern(S.ReturnType);
>      >      > -  for (auto &I : S.IncludeHeaders)
>      >      > -    Intern(I.IncludeHeader);
>      >      > +static void own(Symbol &S, llvm::UniqueStringSaver
>     &Strings) {
>      >      > +  visitStrings(S, [&](StringRef &V) { V =
>     Strings.save(V); });
>      >      >   }
>      >      >
>      >      >   void SymbolSlab::Builder::insert(const Symbol &S) {
>      >      >     auto R = SymbolIndex.try_emplace(S.ID <http://S.ID>
>     <http://S.ID>,
>      >     Symbols.size());
>      >      >     if (R.second) {
>      >      >       Symbols.push_back(S);
>      >      > -    own(Symbols.back(), UniqueStrings, Arena);
>      >      > +    own(Symbols.back(), UniqueStrings);
>      >      >     } else {
>      >      >       auto &Copy = Symbols[R.first->second] = S;
>      >      > -    own(Copy, UniqueStrings, Arena);
>      >      > +    own(Copy, UniqueStrings);
>      >      >     }
>      >      >   }
>      >      >
>      >      > @@ -118,7 +102,7 @@ SymbolSlab SymbolSlab::Builder::build()
>      >      >     BumpPtrAllocator NewArena;
>      >      >     llvm::UniqueStringSaver Strings(NewArena);
>      >      >     for (auto &S : Symbols)
>      >      > -    own(S, Strings, NewArena);
>      >      > +    own(S, Strings);
>      >      >     return SymbolSlab(std::move(NewArena),
>     std::move(Symbols));
>      >      >   }
>      >      >
>      >      >
>      >      > Modified: clang-tools-extra/trunk/clangd/index/Index.h
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=341375&r1=341374&r2=341375&view=diff
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/index/Index.h (original)
>      >      > +++ clang-tools-extra/trunk/clangd/index/Index.h Tue Sep  4
>      >     09:16:50 2018
>      >      > @@ -84,26 +84,28 @@ public:
>      >      >       return HashValue < Sym.HashValue;
>      >      >     }
>      >      >
>      >      > +  constexpr static size_t RawSize = 20;
>      >      > +  llvm::StringRef raw() const {
>      >      > +    return StringRef(reinterpret_cast<const char
>      >     *>(HashValue.data()), RawSize);
>      >      > +  }
>      >      > +  static SymbolID fromRaw(llvm::StringRef);
>      >      >     // Returns a 40-bytes hex encoded string.
>      >      >     std::string str() const;
>      >      >
>      >      >   private:
>      >      > -  static constexpr unsigned HashByteLength = 20;
>      >      > -
>      >      > -  friend llvm::hash_code hash_value(const SymbolID &ID) {
>      >      > -    // We already have a good hash, just return the first
>     bytes.
>      >      > -    static_assert(sizeof(size_t) <= HashByteLength, "size_t
>      >     longer than SHA1!");
>      >      > -    size_t Result;
>      >      > -    memcpy(&Result, ID.HashValue.data(), sizeof(size_t));
>      >      > -    return llvm::hash_code(Result);
>      >      > -  }
>      >      > -  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
>      >      > -                                       const SymbolID &ID);
>      >      >     friend void operator>>(llvm::StringRef Str, SymbolID &ID);
>      >      >
>      >      > -  std::array<uint8_t, HashByteLength> HashValue;
>      >      > +  std::array<uint8_t, RawSize> HashValue;
>      >      >   };
>      >      >
>      >      > +inline llvm::hash_code hash_value(const SymbolID &ID) {
>      >      > +  // We already have a good hash, just return the first
>     bytes.
>      >      > +  assert(sizeof(size_t) <= SymbolID::RawSize && "size_t
>     longer
>      >     than SHA1!");
>      >      > +  size_t Result;
>      >      > +  memcpy(&Result, ID.raw().data(), sizeof(size_t));
>      >      > +  return llvm::hash_code(Result);
>      >      > +}
>      >      > +
>      >      >   // Write SymbolID into the given stream. SymbolID is
>     encoded as
>      >     a 40-bytes
>      >      >   // hex string.
>      >      >   llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const
>      >     SymbolID &ID);
>      >      > @@ -246,6 +248,21 @@ struct Symbol {
>      >      >   };
>      >      >   llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const
>      >     Symbol &S);
>      >      >
>      >      > +// Invokes Callback with each StringRef& contained in the
>     Symbol.
>      >      > +// Useful for deduplicating backing strings.
>      >      > +template <typename Callback> void visitStrings(Symbol &S,
>     const
>      >     Callback &CB) {
>      >      > +  CB(S.Name);
>      >      > +  CB(S.Scope);
>      >      > +  CB(S.CanonicalDeclaration.FileURI);
>      >      > +  CB(S.Definition.FileURI);
>      >      > +  CB(S.Signature);
>      >      > +  CB(S.CompletionSnippetSuffix);
>      >      > +  CB(S.Documentation);
>      >      > +  CB(S.ReturnType);
>      >      > +  for (auto &Include : S.IncludeHeaders)
>      >      > +    CB(Include.IncludeHeader);
>      >      > +}
>      >      > +
>      >      >   // Computes query-independent quality score for a Symbol.
>      >      >   // This currently falls in the range [1, ln(#indexed
>     documents)].
>      >      >   // FIXME: this should probably be split into symbol ->
>     signals
>      >      >
>      >      > Added: clang-tools-extra/trunk/clangd/index/Serialization.cpp
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=341375&view=auto
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/index/Serialization.cpp
>     (added)
>      >      > +++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Tue
>      >     Sep  4 09:16:50 2018
>      >      > @@ -0,0 +1,366 @@
>      >      > +//===-- Serialization.cpp - Binary serialization of index
>     data
>      >     ------------===//
>      >      > +//
>      >      > +//                     The LLVM Compiler Infrastructure
>      >      > +//
>      >      > +// This file is distributed under the University of Illinois
>      >     Open Source
>      >      > +// License. See LICENSE.TXT for details.
>      >      > +//
>      >      >
>      >   
>       +//===----------------------------------------------------------------------===//
>      >      > +#include "Serialization.h"
>      >      > +#include "../RIFF.h"
>      >      > +#include "llvm/Support/Compression.h"
>      >      > +#include "llvm/Support/Endian.h"
>      >      > +#include "llvm/Support/Error.h"
>      >      > +
>      >      > +using namespace llvm;
>      >      > +namespace clang {
>      >      > +namespace clangd {
>      >      > +namespace {
>      >      > +Error makeError(const Twine &Msg) {
>      >      > +  return make_error<StringError>(Msg,
>     inconvertibleErrorCode());
>      >      > +}
>      >      > +
>      >      > +// IO PRIMITIVES
>      >      > +// We use little-endian 32 bit ints, sometimes with
>      >     variable-length encoding.
>      >      > +
>      >      > +StringRef consume(StringRef &Data, int N) {
>      >      > +  StringRef Ret = Data.take_front(N);
>      >      > +  Data = Data.drop_front(N);
>      >      > +  return Ret;
>      >      > +}
>      >      > +
>      >      > +uint8_t consume8(StringRef &Data) {
>      >      > +  uint8_t Ret = Data.front();
>      >      > +  Data = Data.drop_front();
>      >      > +  return Ret;
>      >      > +}
>      >      > +
>      >      > +uint32_t consume32(StringRef &Data) {
>      >      > +  auto Ret = support::endian::read32le(Data.bytes_begin());
>      >      > +  Data = Data.drop_front(4);
>      >      > +  return Ret;
>      >      > +}
>      >      > +
>      >      > +void write32(uint32_t I, raw_ostream &OS) {
>      >      > +  char buf[4];
>      >      > +  support::endian::write32le(buf, I);
>      >      > +  OS.write(buf, sizeof(buf));
>      >      > +}
>      >      > +
>      >      > +// Variable-length int encoding (varint) uses the bottom
>     7 bits
>      >     of each byte
>      >      > +// to encode the number, and the top bit to indicate whether
>      >     more bytes follow.
>      >      > +// e.g. 9a 2f means [0x1a and keep reading, 0x2f and stop].
>      >      > +// This represents 0x1a | 0x2f<<7 = 6042.
>      >      > +// A 32-bit integer takes 1-5 bytes to encode; small
>     numbers are
>      >     more compact.
>      >      > +void writeVar(uint32_t I, raw_ostream &OS) {
>      >      > +  constexpr static uint8_t More = 1 << 7;
>      >      > +  if (LLVM_LIKELY(I < 1 << 7)) {
>      >      > +    OS.write(I);
>      >      > +    return;
>      >      > +  }
>      >      > +  for (;;) {
>      >      > +    OS.write(I | More);
>      >      > +    I >>= 7;
>      >      > +    if (I < 1 << 7) {
>      >      > +      OS.write(I);
>      >      > +      return;
>      >      > +    }
>      >      > +  }
>      >      > +}
>      >      > +
>      >      > +uint32_t consumeVar(StringRef &Data) {
>      >      > +  constexpr static uint8_t More = 1 << 7;
>      >      > +  uint8_t B = consume8(Data);
>      >      > +  if (LLVM_LIKELY(!(B & More)))
>      >      > +    return B;
>      >      > +  uint32_t Val = B & ~More;
>      >      > +  for (int Shift = 7; B & More && Shift < 32; Shift += 7) {
>      >      > +    B = consume8(Data);
>      >      > +    Val |= (B & ~More) << Shift;
>      >      > +  }
>      >      > +  return Val;
>      >      > +}
>      >      > +
>      >      > +// STRING TABLE ENCODING
>      >      > +// Index data has many string fields, and many strings are
>      >     identical.
>      >      > +// We store each string once, and refer to them by index.
>      >      > +//
>      >      > +// The string table's format is:
>      >      > +//   - UncompressedSize : uint32
>      >      > +//   - CompressedData   : byte[CompressedSize]
>      >      > +//
>      >      > +// CompressedData is a zlib-compressed
>     byte[UncompressedSize].
>      >      > +// It contains a sequence of null-terminated strings, e.g.
>      >     "foo\0bar\0".
>      >      > +// These are sorted to improve compression.
>      >      > +
>      >      > +// Maps each string to a canonical representation.
>      >      > +// Strings remain owned externally (e.g. by SymbolSlab).
>      >      > +class StringTableOut {
>      >      > +  DenseSet<StringRef> Unique;
>      >      > +  std::vector<StringRef> Sorted;
>      >      > +  // Since strings are interned, look up can be by pointer.
>      >      > +  DenseMap<std::pair<const char *, size_t>, unsigned> Index;
>      >      > +
>      >      > +public:
>      >      > +  // Add a string to the table. Overwrites S if an identical
>      >     string exists.
>      >      > +  void intern(StringRef &S) { S = *Unique.insert(S).first; };
>      >      > +  // Finalize the table and write it to OS. No more
>     strings may
>      >     be added.
>      >      > +  void finalize(raw_ostream &OS) {
>      >      > +    Sorted = {Unique.begin(), Unique.end()};
>      >      > +    std::sort(Sorted.begin(), Sorted.end());
>      >      > +    for (unsigned I = 0; I < Sorted.size(); ++I)
>      >      > +      Index.try_emplace({Sorted[I].data(),
>     Sorted[I].size()}, I);
>      >      > +
>      >      > +    std::string RawTable;
>      >      > +    for (StringRef S : Sorted) {
>      >      > +      RawTable.append(S);
>      >      > +      RawTable.push_back(0);
>      >      > +    }
>      >      > +    SmallString<1> Compressed;
>      >      > +    cantFail(zlib::compress(RawTable, Compressed));
>      >      > +    write32(RawTable.size(), OS);
>      >      > +    OS << Compressed;
>      >      > +  }
>      >      > +  // Get the ID of an string, which must be interned.
>     Table must
>      >     be finalized.
>      >      > +  unsigned index(StringRef S) const {
>      >      > +    assert(!Sorted.empty() && "table not finalized");
>      >      > +    assert(Index.count({S.data(), S.size()}) && "string not
>      >     interned");
>      >      > +    return Index.find({S.data(), S.size()})->second;
>      >      > +  }
>      >      > +};
>      >      > +
>      >      > +struct StringTableIn {
>      >      > +  BumpPtrAllocator Arena;
>      >      > +  std::vector<StringRef> Strings;
>      >      > +};
>      >      > +
>      >      > +Expected<StringTableIn> readStringTable(StringRef Data) {
>      >      > +  if (Data.size() < 4)
>      >      > +    return makeError("Bad string table: not enough
>     metadata");
>      >      > +  size_t UncompressedSize = consume32(Data);
>      >      > +  SmallString<1> Uncompressed;
>      >      > +  if (Error E = llvm::zlib::uncompress(Data, Uncompressed,
>      >     UncompressedSize))
>      >      > +    return std::move(E);
>      >      > +
>      >      > +  StringTableIn Table;
>      >      > +  StringSaver Saver(Table.Arena);
>      >      > +  for (StringRef Rest = Uncompressed; !Rest.empty();) {
>      >      > +    auto Len = Rest.find(0);
>      >      > +    if (Len == StringRef::npos)
>      >      > +      return makeError("Bad string table: not null
>     terminated");
>      >      > +    Table.Strings.push_back(Saver.save(consume(Rest, Len)));
>      >      > +    Rest = Rest.drop_front();
>      >      > +  }
>      >      > +  return Table;
>      >      > +}
>      >      > +
>      >      > +// SYMBOL ENCODING
>      >      > +// Each field of clangd::Symbol is encoded in turn (see
>      >     implementation).
>      >      > +//  - StringRef fields encode as varint (index into the
>     string
>      >     table)
>      >      > +//  - enums encode as the underlying type
>      >      > +//  - most numbers encode as varint
>      >      > +
>      >      > +// It's useful to the implementation to assume symbols have a
>      >     bounded size.
>      >      > +constexpr size_t SymbolSizeBound = 512;
>      >      > +// To ensure the bounded size, restrict the number of include
>      >     headers stored.
>      >      > +constexpr unsigned MaxIncludes = 50;
>      >      > +
>      >      > +void writeSymbol(const Symbol &Sym, const StringTableOut
>     &Strings,
>      >      > +                 raw_ostream &OS) {
>      >      > +  auto StartOffset = OS.tell();
>      >      > +  OS << Sym.ID.raw(); // TODO: once we start writing
>     xrefs and
>      >     posting lists,
>      >      > +                      // symbol IDs should probably be in a
>      >     string table.
>      >      > +  OS.write(static_cast<uint8_t>(Sym.SymInfo.Kind));
>      >      > +  OS.write(static_cast<uint8_t>(Sym.SymInfo.Lang));
>      >      > +  writeVar(Strings.index(Sym.Name), OS);
>      >      > +  writeVar(Strings.index(Sym.Scope), OS);
>      >      > +  for (const auto &Loc : {Sym.Definition,
>      >     Sym.CanonicalDeclaration}) {
>      >      > +    writeVar(Strings.index(Loc.FileURI), OS);
>      >      > +    for (const auto &Endpoint : {Loc.Start, Loc.End}) {
>      >      > +      writeVar(Endpoint.Line, OS);
>      >      > +      writeVar(Endpoint.Column, OS);
>      >      > +    }
>      >      > +  }
>      >      > +  writeVar(Sym.References, OS);
>      >      > +  OS.write(Sym.IsIndexedForCodeCompletion);
>      >      > +  OS.write(static_cast<uint8_t>(Sym.Origin));
>      >      > +  writeVar(Strings.index(Sym.Signature), OS);
>      >      > +  writeVar(Strings.index(Sym.CompletionSnippetSuffix), OS);
>      >      > +  writeVar(Strings.index(Sym.Documentation), OS);
>      >      > +  writeVar(Strings.index(Sym.ReturnType), OS);
>      >      > +
>      >      > +  auto WriteInclude = [&](const
>      >     Symbol::IncludeHeaderWithReferences &Include) {
>      >      > +    writeVar(Strings.index(Include.IncludeHeader), OS);
>      >      > +    writeVar(Include.References, OS);
>      >      > +  };
>      >      > +  // There are almost certainly few includes, so we can just
>      >     write them.
>      >      > +  if (LLVM_LIKELY(Sym.IncludeHeaders.size() <=
>     MaxIncludes)) {
>      >      > +    writeVar(Sym.IncludeHeaders.size(), OS);
>      >      > +    for (const auto &Include : Sym.IncludeHeaders)
>      >      > +      WriteInclude(Include);
>      >      > +  } else {
>      >      > +    // If there are too many, make sure we truncate the least
>      >     important.
>      >      > +    using Pointer = const
>     Symbol::IncludeHeaderWithReferences *;
>      >      > +    std::vector<Pointer> Pointers;
>      >      > +    for (const auto &Include : Sym.IncludeHeaders)
>      >      > +      Pointers.push_back(&Include);
>      >      > +    std::sort(Pointers.begin(), Pointers.end(), [](Pointer L,
>      >     Pointer R) {
>      >      > +      return L->References > R->References;
>      >      > +    });
>      >      > +    Pointers.resize(MaxIncludes);
>      >      > +
>      >      > +    writeVar(MaxIncludes, OS);
>      >      > +    for (Pointer P : Pointers)
>      >      > +      WriteInclude(*P);
>      >      > +  }
>      >      > +
>      >      > +  assert(OS.tell() - StartOffset < SymbolSizeBound && "Symbol
>      >     length unsafe!");
>      >      > +  (void)StartOffset; // Unused in NDEBUG;
>      >      > +}
>      >      > +
>      >      > +Expected<Symbol> readSymbol(StringRef &Data, const
>     StringTableIn
>      >     &Strings) {
>      >      > +  // Usually we can skip bounds checks because the buffer
>     is huge.
>      >      > +  // Near the end of the buffer, this would be unsafe. In
>     this
>      >     rare case, copy
>      >      > +  // the data into a bigger buffer so we can again skip
>     the checks.
>      >      > +  if (LLVM_UNLIKELY(Data.size() < SymbolSizeBound)) {
>      >      > +    std::string Buf(Data);
>      >      > +    Buf.resize(SymbolSizeBound);
>      >      > +    StringRef ExtendedData = Buf;
>      >      > +    auto Ret = readSymbol(ExtendedData, Strings);
>      >      > +    unsigned BytesRead = Buf.size() - ExtendedData.size();
>      >      > +    if (BytesRead > Data.size())
>      >      > +      return makeError("read past end of data");
>      >      > +    Data = Data.drop_front(BytesRead);
>      >      > +    return Ret;
>      >      > +  }
>      >      > +
>      >      > +#define READ_STRING(Field)
>      >                     \
>      >      > +  do {
>      >                     \
>      >      > +    auto StringIndex = consumeVar(Data);
>      >                     \
>      >      > +    if (LLVM_UNLIKELY(StringIndex >=
>     Strings.Strings.size()))
>      >                    \
>      >      > +      return makeError("Bad string index");
>      >                    \
>      >      > +    Field = Strings.Strings[StringIndex];
>      >                    \
>      >      > +  } while (0)
>      >      > +
>      >      > +  Symbol Sym;
>      >      > +  Sym.ID = SymbolID::fromRaw(consume(Data, 20));
>      >      > +  Sym.SymInfo.Kind =
>     static_cast<index::SymbolKind>(consume8(Data));
>      >      > +  Sym.SymInfo.Lang =
>      >     static_cast<index::SymbolLanguage>(consume8(Data));
>      >      > +  READ_STRING(Sym.Name);
>      >      > +  READ_STRING(Sym.Scope);
>      >      > +  for (SymbolLocation *Loc : {&Sym.Definition,
>      >     &Sym.CanonicalDeclaration}) {
>      >      > +    READ_STRING(Loc->FileURI);
>      >      > +    for (auto &Endpoint : {&Loc->Start, &Loc->End}) {
>      >      > +      Endpoint->Line = consumeVar(Data);
>      >      > +      Endpoint->Column = consumeVar(Data);
>      >      > +    }
>      >      > +  }
>      >      > +  Sym.References = consumeVar(Data);
>      >      > +  Sym.IsIndexedForCodeCompletion = consume8(Data);
>      >      > +  Sym.Origin = static_cast<SymbolOrigin>(consume8(Data));
>      >      > +  READ_STRING(Sym.Signature);
>      >      > +  READ_STRING(Sym.CompletionSnippetSuffix);
>      >      > +  READ_STRING(Sym.Documentation);
>      >      > +  READ_STRING(Sym.ReturnType);
>      >      > +  unsigned IncludeHeaderN = consumeVar(Data);
>      >      > +  if (IncludeHeaderN > MaxIncludes)
>      >      > +    return makeError("too many IncludeHeaders");
>      >      > +  Sym.IncludeHeaders.resize(IncludeHeaderN);
>      >      > +  for (auto &I : Sym.IncludeHeaders) {
>      >      > +    READ_STRING(I.IncludeHeader);
>      >      > +    I.References = consumeVar(Data);
>      >      > +  }
>      >      > +
>      >      > +#undef READ_STRING
>      >      > +  return Sym;
>      >      > +}
>      >      > +
>      >      > +} // namespace
>      >      > +
>      >      > +// FILE ENCODING
>      >      > +// A file is a RIFF chunk with type 'CdIx'.
>      >      > +// It contains the sections:
>      >      > +//   - meta: version number
>      >      > +//   - stri: string table
>      >      > +//   - symb: symbols
>      >      > +
>      >      > +// The current versioning scheme is simple - non-current
>      >     versions are rejected.
>      >      > +// This allows arbitrary format changes, which invalidate
>     stored
>      >     data.
>      >      > +// Later we may want to support some backward compatibility.
>      >      > +constexpr static uint32_t Version = 1;
>      >      > +
>      >      > +Expected<IndexFileIn> readIndexFile(StringRef Data) {
>      >      > +  auto RIFF = riff::readFile(Data);
>      >      > +  if (!RIFF)
>      >      > +    return RIFF.takeError();
>      >      > +  if (RIFF->Type != riff::fourCC("CdIx"))
>      >      > +    return makeError("wrong RIFF type");
>      >      > +  StringMap<StringRef> Chunks;
>      >      > +  for (const auto &Chunk : RIFF->Chunks)
>      >      > +    Chunks.try_emplace(StringRef(Chunk.ID.data(),
>      >     Chunk.ID.size()), Chunk.Data);
>      >      > +
>      >      > +  for (StringRef RequiredChunk : {"meta", "stri"})
>      >      > +    if (!Chunks.count(RequiredChunk))
>      >      > +      return makeError("missing required chunk " +
>     RequiredChunk);
>      >      > +
>      >      > +  StringRef Meta = Chunks.lookup("meta");
>      >      > +  if (Meta.size() < 4 || consume32(Meta) != Version)
>      >      > +    return makeError("wrong version");
>      >      > +
>      >      > +  auto Strings = readStringTable(Chunks.lookup("stri"));
>      >      > +  if (!Strings)
>      >      > +    return Strings.takeError();
>      >      > +
>      >      > +  IndexFileIn Result;
>      >      > +  if (Chunks.count("symb")) {
>      >      > +    StringRef SymbolData = Chunks.lookup("symb");
>      >      > +    SymbolSlab::Builder Symbols;
>      >      > +    while (!SymbolData.empty())
>      >      > +      if (auto Sym = readSymbol(SymbolData, *Strings))
>      >      > +        Symbols.insert(*Sym);
>      >      > +      else
>      >      > +        return Sym.takeError();
>      >      > +    Result.Symbols = std::move(Symbols).build();
>      >      > +  }
>      >      > +  return Result;
>      >      > +}
>      >      > +
>      >      > +raw_ostream &operator<<(raw_ostream &OS, const
>     IndexFileOut &Data) {
>      >      > +  assert(Data.Symbols && "An index file without symbols
>     makes no
>      >     sense!");
>      >      > +  riff::File RIFF;
>      >      > +  RIFF.Type = riff::fourCC("CdIx");
>      >      > +
>      >      > +  SmallString<4> Meta;
>      >      > +  {
>      >      > +    raw_svector_ostream MetaOS(Meta);
>      >      > +    write32(Version, MetaOS);
>      >      > +  }
>      >      > +  RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
>      >      > +
>      >      > +  StringTableOut Strings;
>      >      > +  std::vector<Symbol> Symbols;
>      >      > +  for (const auto &Sym : *Data.Symbols) {
>      >      > +    Symbols.emplace_back(Sym);
>      >      > +    visitStrings(Symbols.back(), [&](StringRef &S) {
>      >     Strings.intern(S); });
>      >      > +  }
>      >      > +
>      >      > +  std::string StringSection;
>      >      > +  {
>      >      > +    raw_string_ostream StringOS(StringSection);
>      >      > +    Strings.finalize(StringOS);
>      >      > +  }
>      >      > +  RIFF.Chunks.push_back({riff::fourCC("stri"),
>     StringSection});
>      >      > +
>      >      > +  std::string SymbolSection;
>      >      > +  {
>      >      > +    raw_string_ostream SymbolOS(SymbolSection);
>      >      > +    for (const auto &Sym : Symbols)
>      >      > +      writeSymbol(Sym, Strings, SymbolOS);
>      >      > +  }
>      >      > +  RIFF.Chunks.push_back({riff::fourCC("symb"),
>     SymbolSection});
>      >      > +
>      >      > +  return OS << RIFF;
>      >      > +}
>      >      > +
>      >      > +} // namespace clangd
>      >      > +} // namespace clang
>      >      >
>      >      > Added: clang-tools-extra/trunk/clangd/index/Serialization.h
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.h?rev=341375&view=auto
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/index/Serialization.h
>     (added)
>      >      > +++ clang-tools-extra/trunk/clangd/index/Serialization.h
>     Tue Sep
>      >     4 09:16:50 2018
>      >      > @@ -0,0 +1,48 @@
>      >      > +//===--- Serialization.h - Binary serialization of index data
>      >     ----*- C++-*-===//
>      >      > +//
>      >      > +//                     The LLVM Compiler Infrastructure
>      >      > +//
>      >      > +// This file is distributed under the University of Illinois
>      >     Open Source
>      >      > +// License. See LICENSE.TXT for details.
>      >      > +//
>      >      >
>      >   
>       +//===----------------------------------------------------------------------===//
>      >      > +//
>      >      > +// This file provides a compact binary serialization of
>     indexed
>      >     symbols.
>      >      > +//
>      >      > +// It writes two sections:
>      >      > +//  - a string table (which is compressed)
>      >      > +//  - lists of encoded symbols
>      >      > +//
>      >      > +// The format has a simple versioning scheme: the version is
>      >     embedded in the
>      >      > +// data and non-current versions are rejected when reading.
>      >      > +//
>      >      >
>      >   
>       +//===----------------------------------------------------------------------===//
>      >      > +
>      >      > +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
>      >      > +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
>      >      > +#include "Index.h"
>      >      > +#include "llvm/Support/Error.h"
>      >      > +
>      >      > +namespace clang {
>      >      > +namespace clangd {
>      >      > +
>      >      > +// Specifies the contents of an index file to be written.
>      >      > +struct IndexFileOut {
>      >      > +  const SymbolSlab *Symbols;
>      >      > +  // TODO: Support serializing symbol occurrences.
>      >      > +  // TODO: Support serializing Dex posting lists.
>      >      > +};
>      >      > +// Serializes an index file. (This is a RIFF container
>     chunk).
>      >      > +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const
>      >     IndexFileOut &);
>      >      > +
>      >      > +// Holds the contents of an index file that was read.
>      >      > +struct IndexFileIn {
>      >      > +  llvm::Optional<SymbolSlab> Symbols;
>      >      > +};
>      >      > +// Parse an index file. The input must be a RIFF
>     container chunk.
>      >      > +llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
>      >      > +
>      >      > +} // namespace clangd
>      >      > +} // namespace clang
>      >      > +
>      >      > +#endif
>      >      >
>      >      > Modified: clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
>      >      > URL:
>      >
>     http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp?rev=341375&r1=341374&r2=341375&view=diff
>      >      >
>      >   
>       ==============================================================================
>      >      > --- clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
>     (original)
>      >      > +++ clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
>     Tue Sep
>      >     4 09:16:50 2018
>      >      > @@ -9,6 +9,7 @@
>      >      >
>      >      >   #include "SymbolYAML.h"
>      >      >   #include "Index.h"
>      >      > +#include "Serialization.h"
>      >      >   #include "dex/DexIndex.h"
>      >      >   #include "llvm/ADT/Optional.h"
>      >      >   #include "llvm/ADT/SmallVector.h"
>      >      > @@ -189,10 +190,20 @@ std::unique_ptr<SymbolIndex> loadIndex(l
>      >      >       llvm::errs() << "Can't open " << SymbolFile << "\n";
>      >      >       return nullptr;
>      >      >     }
>      >      > -  auto Slab = symbolsFromYAML(Buffer.get()->getBuffer());
>      >      > +  StringRef Data = Buffer->get()->getBuffer();
>      >      >
>      >      > -  return UseDex ? dex::DexIndex::build(std::move(Slab))
>      >      > -                : MemIndex::build(std::move(Slab),
>     RefSlab());
>      >      > +  llvm::Optional<SymbolSlab> Slab;
>      >      > +  if (Data.startswith("RIFF")) { // Magic for binary
>     index file.
>      >      > +    if (auto RIFF = readIndexFile(Data))
>      >      > +      Slab = std::move(RIFF->Symbols);
>      >      > +    else
>      >      > +      llvm::errs() << "Bad RIFF: " <<
>      > 
> 



More information about the cfe-commits mailing list