[clang-tools-extra] r347669 - [clangd] Put direct headers into srcs section.

Kadir Cetinkaya via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 27 08:08:53 PST 2018


Author: kadircet
Date: Tue Nov 27 08:08:53 2018
New Revision: 347669

URL: http://llvm.org/viewvc/llvm-project?rev=347669&view=rev
Log:
[clangd] Put direct headers into srcs section.

Summary:
Currently, there's no way of knowing about header files
using compilation database, since it doesn't contain header files as entries.

Using this information, restoring from cache using compile commands becomes
possible instead of doing directory traversal. Also, we can issue indexing
actions for out-of-date headers even if source files depending on them haven't
changed.

Reviewers: sammccall

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, cfe-commits

Differential Revision: https://reviews.llvm.org/D54817

Modified:
    clang-tools-extra/trunk/clangd/Headers.h
    clang-tools-extra/trunk/clangd/SourceCode.cpp
    clang-tools-extra/trunk/clangd/SourceCode.h
    clang-tools-extra/trunk/clangd/index/Background.cpp
    clang-tools-extra/trunk/clangd/index/Background.h
    clang-tools-extra/trunk/clangd/index/Serialization.cpp
    clang-tools-extra/trunk/clangd/index/Serialization.h
    clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
    clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp

Modified: clang-tools-extra/trunk/clangd/Headers.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Headers.h?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/Headers.h (original)
+++ clang-tools-extra/trunk/clangd/Headers.h Tue Nov 27 08:08:53 2018
@@ -48,6 +48,20 @@ struct Inclusion {
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion&);
 
+// Contains information about one file in the build grpah and its direct
+// dependencies. Doesn't own the strings it references (IncludeGraph is
+// self-contained).
+struct IncludeGraphNode {
+  // True if current file is a main file rather than a header.
+  bool IsTU;
+  llvm::StringRef URI;
+  FileDigest Digest;
+  std::vector<llvm::StringRef> DirectIncludes;
+};
+// FileURI and FileInclusions are references to keys of the map containing
+// them.
+using IncludeGraph = llvm::StringMap<IncludeGraphNode>;
+
 // Information captured about the inclusion graph in a translation unit.
 // This includes detailed information about the direct #includes, and summary
 // information about all transitive includes.

Modified: clang-tools-extra/trunk/clangd/SourceCode.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.cpp?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.cpp (original)
+++ clang-tools-extra/trunk/clangd/SourceCode.cpp Tue Nov 27 08:08:53 2018
@@ -227,5 +227,17 @@ bool IsRangeConsecutive(const Range &Lef
          Left.end.character == Right.start.character;
 }
 
+FileDigest digest(StringRef Content) {
+  return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
+  bool Invalid = false;
+  StringRef Content = SM.getBufferData(FID, &Invalid);
+  if (Invalid)
+    return None;
+  return digest(Content);
+}
+
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/SourceCode.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/SourceCode.h?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.h (original)
+++ clang-tools-extra/trunk/clangd/SourceCode.h Tue Nov 27 08:08:53 2018
@@ -16,13 +16,22 @@
 #include "Protocol.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Tooling/Core/Replacement.h"
+#include "llvm/Support/SHA1.h"
 
 namespace clang {
 class SourceManager;
 
 namespace clangd {
 
+// We tend to generate digests for source codes in a lot of different places.
+// This represents the type for those digests to prevent us hard coding details
+// of hashing function at every place that needs to store this information.
+using FileDigest = decltype(llvm::SHA1::hash({}));
+FileDigest digest(StringRef Content);
+Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
+
 // Counts the number of UTF-16 code units needed to represent a string (LSP
 // specifies string lengths in UTF-16 code units).
 size_t lspLength(StringRef Code);

Modified: clang-tools-extra/trunk/clangd/index/Background.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Background.cpp Tue Nov 27 08:08:53 2018
@@ -11,6 +11,7 @@
 #include "ClangdUnit.h"
 #include "Compiler.h"
 #include "Logger.h"
+#include "SourceCode.h"
 #include "Threading.h"
 #include "Trace.h"
 #include "URI.h"
@@ -149,19 +150,6 @@ void BackgroundIndex::enqueueTask(Task T
   QueueCV.notify_all();
 }
 
-static BackgroundIndex::FileDigest digest(StringRef Content) {
-  return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
-}
-
-static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
-                                                        FileID FID) {
-  bool Invalid = false;
-  StringRef Content = SM.getBufferData(FID, &Invalid);
-  if (Invalid)
-    return None;
-  return digest(Content);
-}
-
 // Resolves URI to file paths with cache.
 class URIToFileCache {
 public:
@@ -193,8 +181,7 @@ private:
 };
 
 /// Given index results from a TU, only update files in \p FilesToUpdate.
-void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
-                             RefSlab Refs,
+void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index,
                              const StringMap<FileDigest> &FilesToUpdate,
                              BackgroundIndexStorage *IndexStorage) {
   // Partition symbols/references into files.
@@ -204,7 +191,7 @@ void BackgroundIndex::update(StringRef M
   };
   StringMap<File> Files;
   URIToFileCache URICache(MainFile);
-  for (const auto &Sym : Symbols) {
+  for (const auto &Sym : *Index.Symbols) {
     if (Sym.CanonicalDeclaration) {
       auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
       if (FilesToUpdate.count(DeclPath) != 0)
@@ -222,7 +209,7 @@ void BackgroundIndex::update(StringRef M
     }
   }
   DenseMap<const Ref *, SymbolID> RefToIDs;
-  for (const auto &SymRefs : Refs) {
+  for (const auto &SymRefs : *Index.Refs) {
     for (const auto &R : SymRefs.second) {
       auto Path = URICache.resolve(R.Location.FileURI);
       if (FilesToUpdate.count(Path) != 0) {
@@ -250,12 +237,11 @@ void BackgroundIndex::update(StringRef M
     auto Hash = FilesToUpdate.lookup(Path);
     // We need to store shards before updating the index, since the latter
     // consumes slabs.
-    // FIXME: Store Hash in the Shard.
     if (IndexStorage) {
       IndexFileOut Shard;
       Shard.Symbols = SS.get();
       Shard.Refs = RS.get();
-      Shard.Digest = &Hash;
+
       if (auto Error = IndexStorage->storeShard(Path, Shard))
         elog("Failed to write background-index shard for file {0}: {1}", Path,
              std::move(Error));
@@ -275,8 +261,8 @@ void BackgroundIndex::update(StringRef M
 // \p FileDigests contains file digests for the current indexed files, and all
 // changed files will be added to \p FilesToUpdate.
 decltype(SymbolCollector::Options::FileFilter) createFileFilter(
-    const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
-    llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
+    const llvm::StringMap<FileDigest> &FileDigests,
+    llvm::StringMap<FileDigest> &FilesToUpdate) {
   return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
     StringRef Path;
     if (const auto *F = SM.getFileEntryForID(FID))
@@ -375,8 +361,11 @@ Error BackgroundIndex::index(tooling::Co
       Symbols.size(), Refs.numRefs());
   SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
   SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
-  update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
-         IndexStorage);
+  IndexFileIn Index;
+  Index.Symbols = std::move(Symbols);
+  Index.Refs = std::move(Refs);
+
+  update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage);
   {
     // Make sure hash for the main file is always updated even if there is no
     // index data in it.

Modified: clang-tools-extra/trunk/clangd/index/Background.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Background.h (original)
+++ clang-tools-extra/trunk/clangd/index/Background.h Tue Nov 27 08:08:53 2018
@@ -84,11 +84,10 @@ public:
   LLVM_NODISCARD bool
   blockUntilIdleForTest(llvm::Optional<double> TimeoutSeconds = 10);
 
-  using FileDigest = decltype(llvm::SHA1::hash({}));
-
 private:
   /// Given index results from a TU, only update files in \p FilesToUpdate.
-  void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
+  /// Also stores new index information on IndexStorage.
+  void update(llvm::StringRef MainFile, IndexFileIn Index,
               const llvm::StringMap<FileDigest> &FilesToUpdate,
               BackgroundIndexStorage *IndexStorage);
 

Modified: clang-tools-extra/trunk/clangd/index/Serialization.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.cpp?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.cpp Tue Nov 27 08:08:53 2018
@@ -247,6 +247,31 @@ SymbolLocation readLocation(Reader &Data
   return Loc;
 }
 
+IncludeGraphNode readIncludeGraphNode(Reader &Data,
+                                      llvm::ArrayRef<llvm::StringRef> Strings) {
+  IncludeGraphNode IGN;
+  IGN.IsTU = Data.consume8();
+  IGN.URI = Data.consumeString(Strings);
+  llvm::StringRef Digest = Data.consume(IGN.Digest.size());
+  std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
+  IGN.DirectIncludes.resize(Data.consumeVar());
+  for (llvm::StringRef &Include : IGN.DirectIncludes)
+    Include = Data.consumeString(Strings);
+  return IGN;
+}
+
+void writeIncludeGraphNode(const IncludeGraphNode &IGN,
+                           const StringTableOut &Strings, raw_ostream &OS) {
+  OS.write(IGN.IsTU);
+  writeVar(Strings.index(IGN.URI), OS);
+  llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
+                       IGN.Digest.size());
+  OS << Hash;
+  writeVar(IGN.DirectIncludes.size(), OS);
+  for (llvm::StringRef Include : IGN.DirectIncludes)
+    writeVar(Strings.index(Include), OS);
+}
+
 void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
                  raw_ostream &OS) {
   OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
@@ -333,7 +358,7 @@ std::pair<SymbolID, std::vector<Ref>> re
 // A file is a RIFF chunk with type 'CdIx'.
 // It contains the sections:
 //   - meta: version number
-//   - srcs: checksum of the source file
+//   - srcs: information related to include graph
 //   - stri: string table
 //   - symb: symbols
 //   - refs: references to symbols
@@ -367,10 +392,20 @@ Expected<IndexFileIn> readRIFF(StringRef
 
   IndexFileIn Result;
   if (Chunks.count("srcs")) {
-    Reader Hash(Chunks.lookup("srcs"));
-    Result.Digest.emplace();
-    llvm::StringRef Digest = Hash.consume(Result.Digest->size());
-    std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin());
+    Reader SrcsReader(Chunks.lookup("srcs"));
+    Result.Sources.emplace();
+    while (!SrcsReader.eof()) {
+      auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
+      auto Entry = Result.Sources->try_emplace(IGN.URI).first;
+      Entry->getValue() = std::move(IGN);
+      // We change all the strings inside the structure to point at the keys in
+      // the map, since it is the only copy of the string that's going to live.
+      Entry->getValue().URI = Entry->getKey();
+      for (auto &Include : Entry->getValue().DirectIncludes)
+        Include = Result.Sources->try_emplace(Include).first->getKey();
+    }
+    if (SrcsReader.err())
+      return makeError("malformed or truncated include uri");
   }
 
   if (Chunks.count("symb")) {
@@ -397,6 +432,13 @@ Expected<IndexFileIn> readRIFF(StringRef
   return std::move(Result);
 }
 
+template <class Callback>
+void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
+  CB(IGN.URI);
+  for (llvm::StringRef &Include : IGN.DirectIncludes)
+    CB(Include);
+}
+
 void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
   assert(Data.Symbols && "An index file without symbols makes no sense!");
   riff::File RIFF;
@@ -409,18 +451,19 @@ void writeRIFF(const IndexFileOut &Data,
   }
   RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
 
-  if (Data.Digest) {
-    llvm::StringRef Hash(reinterpret_cast<const char *>(Data.Digest->data()),
-                         Data.Digest->size());
-    RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash});
-  }
-
   StringTableOut Strings;
   std::vector<Symbol> Symbols;
   for (const auto &Sym : *Data.Symbols) {
     Symbols.emplace_back(Sym);
     visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
   }
+  std::vector<IncludeGraphNode> Sources;
+  if (Data.Sources)
+    for (const auto &Source : *Data.Sources) {
+      Sources.push_back(Source.getValue());
+      visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); });
+    }
+
   std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
   if (Data.Refs) {
     for (const auto &Sym : *Data.Refs) {
@@ -458,6 +501,16 @@ void writeRIFF(const IndexFileOut &Data,
     RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
   }
 
+  std::string SrcsSection;
+  {
+    {
+      raw_string_ostream SrcsOS(SrcsSection);
+      for (const auto &SF : Sources)
+        writeIncludeGraphNode(SF, Strings, SrcsOS);
+    }
+    RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
+  }
+
   OS << RIFF;
 }
 

Modified: clang-tools-extra/trunk/clangd/index/Serialization.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Serialization.h?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Serialization.h (original)
+++ clang-tools-extra/trunk/clangd/index/Serialization.h Tue Nov 27 08:08:53 2018
@@ -24,6 +24,7 @@
 
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
+#include "Headers.h"
 #include "Index.h"
 #include "llvm/Support/Error.h"
 
@@ -37,11 +38,10 @@ enum class IndexFileFormat {
 
 // Holds the contents of an index file that was read.
 struct IndexFileIn {
-  using FileDigest = std::array<uint8_t, 20>;
   llvm::Optional<SymbolSlab> Symbols;
   llvm::Optional<RefSlab> Refs;
-  // Digest of the source file that generated the contents.
-  llvm::Optional<FileDigest> Digest;
+  // Keys are URIs of the source files.
+  llvm::Optional<IncludeGraph> Sources;
 };
 // Parse an index file. The input must be a RIFF or YAML file.
 llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
@@ -50,8 +50,8 @@ llvm::Expected<IndexFileIn> readIndexFil
 struct IndexFileOut {
   const SymbolSlab *Symbols = nullptr;
   const RefSlab *Refs = nullptr;
-  // Digest of the source file that generated the contents.
-  const IndexFileIn::FileDigest *Digest = nullptr;
+  // Keys are URIs of the source files.
+  const IncludeGraph *Sources = nullptr;
   // TODO: Support serializing Dex posting lists.
   IndexFileFormat Format = IndexFileFormat::RIFF;
 

Modified: clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Tue Nov 27 08:08:53 2018
@@ -129,8 +129,6 @@ TEST_F(BackgroundIndexTest, ShardStorage
       )cpp";
   std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }";
   FS.Files[testPath("root/A.cc")] = A_CC;
-  auto Digest = llvm::SHA1::hash(
-      {reinterpret_cast<const uint8_t *>(A_CC.data()), A_CC.size()});
 
   llvm::StringMap<std::string> Storage;
   size_t CacheHits = 0;
@@ -165,7 +163,6 @@ TEST_F(BackgroundIndexTest, ShardStorage
   EXPECT_NE(ShardSource, nullptr);
   EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre());
   EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")}));
-  EXPECT_EQ(*ShardSource->Digest, Digest);
 }
 
 } // namespace clangd

Modified: clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp?rev=347669&r1=347668&r2=347669&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SerializationTests.cpp Tue Nov 27 08:08:53 2018
@@ -173,31 +173,44 @@ TEST(SerializationTest, BinaryConversion
               UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
 }
 
-TEST(SerializationTest, HashTest) {
+TEST(SerializationTest, SrcsTest) {
   auto In = readIndexFile(YAML);
   EXPECT_TRUE(bool(In)) << In.takeError();
 
-  std::string TestContent("TESTCONTENT");
-  auto Digest =
+  std::string TestContent("TestContent");
+  IncludeGraphNode IGN;
+  IGN.Digest =
       llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
                         TestContent.size()});
+  IGN.DirectIncludes = {"inc1", "inc2"};
+  IGN.URI = "URI";
+  IGN.IsTU = true;
+  IncludeGraph Sources;
+  Sources[IGN.URI] = IGN;
   // Write to binary format, and parse again.
   IndexFileOut Out(*In);
   Out.Format = IndexFileFormat::RIFF;
-  Out.Digest = &Digest;
-  std::string Serialized = to_string(Out);
+  Out.Sources = &Sources;
+  {
+    std::string Serialized = to_string(Out);
 
-  auto In2 = readIndexFile(Serialized);
-  ASSERT_TRUE(bool(In2)) << In.takeError();
-  ASSERT_EQ(In2->Digest, Digest);
-  ASSERT_TRUE(In2->Symbols);
-  ASSERT_TRUE(In2->Refs);
-
-  // Assert the YAML serializations match, for nice comparisons and diffs.
-  EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
-              UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
-  EXPECT_THAT(YAMLFromRefs(*In2->Refs),
-              UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+    auto In = readIndexFile(Serialized);
+    ASSERT_TRUE(bool(In)) << In.takeError();
+    ASSERT_TRUE(In->Symbols);
+    ASSERT_TRUE(In->Refs);
+    ASSERT_TRUE(In->Sources);
+    ASSERT_TRUE(In->Sources->count(IGN.URI));
+    // Assert the YAML serializations match, for nice comparisons and diffs.
+    EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
+                UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
+    EXPECT_THAT(YAMLFromRefs(*In->Refs),
+                UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+    auto IGNDeserialized = In->Sources->lookup(IGN.URI);
+    EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
+    EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
+    EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
+    EXPECT_EQ(IGNDeserialized.IsTU, IGN.IsTU);
+  }
 }
 
 } // namespace




More information about the cfe-commits mailing list