[clang-tools-extra] r320486 - [clangd] Introduce a "Symbol" class.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Tue Dec 12 07:42:10 PST 2017


Author: hokein
Date: Tue Dec 12 07:42:10 2017
New Revision: 320486

URL: http://llvm.org/viewvc/llvm-project?rev=320486&view=rev
Log:
[clangd] Introduce a "Symbol" class.

Summary:
* The "Symbol" class represents a C++ symbol in the codebase, containing all the
  information of a C++ symbol needed by clangd. clangd will use it in clangd's
  AST/dynamic index and global/static index (code completion and code
  navigation).
* The SymbolCollector (another IndexAction) will be used to recollect the
  symbols when the source file is changed (for ASTIndex), or to generate
  all C++ symbols for the whole project.

In the long term (when index-while-building is ready), clangd should share a
same "Symbol" structure and IndexAction with index-while-building, but
for now we want to have some stuff working in clangd.

Reviewers: ioeric, sammccall, ilya-biryukov, malaperle

Reviewed By: sammccall

Subscribers: malaperle, klimek, mgorny, cfe-commits

Differential Revision: https://reviews.llvm.org/D40897

Added:
    clang-tools-extra/trunk/clangd/index/
    clang-tools-extra/trunk/clangd/index/Index.cpp
    clang-tools-extra/trunk/clangd/index/Index.h
    clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
    clang-tools-extra/trunk/clangd/index/SymbolCollector.h
    clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
Modified:
    clang-tools-extra/trunk/clangd/CMakeLists.txt
    clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt

Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=320486&r1=320485&r2=320486&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Tue Dec 12 07:42:10 2017
@@ -19,6 +19,8 @@ add_clang_library(clangDaemon
   Protocol.cpp
   ProtocolHandlers.cpp
   Trace.cpp
+  index/Index.cpp
+  index/SymbolCollector.cpp
 
   LINK_LIBS
   clangAST

Added: clang-tools-extra/trunk/clangd/index/Index.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.cpp?rev=320486&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.cpp (added)
+++ clang-tools-extra/trunk/clangd/index/Index.cpp Tue Dec 12 07:42:10 2017
@@ -0,0 +1,49 @@
+//===--- Index.cpp -----------------------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Index.h"
+
+#include "llvm/Support/SHA1.h"
+
+namespace clang {
+namespace clangd {
+
+namespace {
+ArrayRef<uint8_t> toArrayRef(StringRef S) {
+  return {reinterpret_cast<const uint8_t *>(S.data()), S.size()};
+}
+} // namespace
+
+SymbolID::SymbolID(llvm::StringRef USR)
+    : HashValue(llvm::SHA1::hash(toArrayRef(USR))) {}
+
+SymbolSlab::const_iterator SymbolSlab::begin() const {
+  return Symbols.begin();
+}
+
+SymbolSlab::const_iterator SymbolSlab::end() const {
+  return Symbols.end();
+}
+
+SymbolSlab::const_iterator SymbolSlab::find(const SymbolID& SymID) const {
+  return Symbols.find(SymID);
+}
+
+void SymbolSlab::freeze() {
+  Frozen = true;
+}
+
+void SymbolSlab::insert(Symbol S) {
+  assert(!Frozen &&
+         "Can't insert a symbol after the slab has been frozen!");
+  Symbols[S.ID] = std::move(S);
+}
+
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=320486&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (added)
+++ clang-tools-extra/trunk/clangd/index/Index.h Tue Dec 12 07:42:10 2017
@@ -0,0 +1,136 @@
+//===--- Symbol.h -----------------------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
+
+#include "clang/Index/IndexSymbol.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include <array>
+#include <string>
+
+namespace clang {
+namespace clangd {
+
+struct SymbolLocation {
+  // The absolute path of the source file where a symbol occurs.
+  std::string FilePath;
+  // The 0-based offset to the first character of the symbol from the beginning
+  // of the source file.
+  unsigned StartOffset;
+  // The 0-based offset to the last character of the symbol from the beginning
+  // of the source file.
+  unsigned EndOffset;
+};
+
+// The class identifies a particular C++ symbol (class, function, method, etc).
+//
+// As USRs (Unified Symbol Resolution) could be large, especially for functions
+// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to
+// guarantee the uniqueness of symbols while using a relatively small amount of
+// memory (vs storing USRs directly).
+//
+// SymbolID can be used as key in the symbol indexes to lookup the symbol.
+class SymbolID {
+public:
+  SymbolID() = default;
+  SymbolID(llvm::StringRef USR);
+
+  bool operator==(const SymbolID& Sym) const {
+    return HashValue == Sym.HashValue;
+  }
+
+private:
+  friend class llvm::DenseMapInfo<clang::clangd::SymbolID>;
+
+  std::array<uint8_t, 20> HashValue;
+};
+
+// The class presents a C++ symbol, e.g. class, function.
+//
+// FIXME: instead of having own copy fields for each symbol, we can share
+// storage from SymbolSlab.
+struct Symbol {
+  // The ID of the symbol.
+  SymbolID ID;
+  // The qualified name of the symbol, e.g. Foo::bar.
+  std::string QualifiedName;
+  // The symbol information, like symbol kind.
+  index::SymbolInfo SymInfo;
+  // The location of the canonical declaration of the symbol.
+  //
+  // A C++ symbol could have multiple declarations and one definition (e.g.
+  // a function is declared in ".h" file, and is defined in ".cc" file).
+  //   * For classes, the canonical declaration is usually definition.
+  //   * For non-inline functions, the canonical declaration is a declaration
+  //     (not a definition), which is usually declared in ".h" file.
+  SymbolLocation CanonicalDeclaration;
+
+  // FIXME: add definition location of the symbol.
+  // FIXME: add all occurrences support.
+  // FIXME: add extra fields for index scoring signals.
+  // FIXME: add code completion information.
+};
+
+// A symbol container that stores a set of symbols. The container will maintain
+// the lifetime of the symbols.
+//
+// FIXME: Use a space-efficient implementation, a lot of Symbol fields could
+// share the same storage.
+class SymbolSlab {
+ public:
+  using const_iterator = llvm::DenseMap<SymbolID, Symbol>::const_iterator;
+
+  SymbolSlab() = default;
+
+  const_iterator begin() const;
+  const_iterator end() const;
+  const_iterator find(const SymbolID& SymID) const;
+
+  // Once called, no more symbols would be added to the SymbolSlab. This
+  // operation is irreversible.
+  void freeze();
+
+  void insert(Symbol S);
+
+ private:
+  bool Frozen = false;
+
+  llvm::DenseMap<SymbolID, Symbol> Symbols;
+};
+
+} // namespace clangd
+} // namespace clang
+
+namespace llvm {
+
+template <> struct DenseMapInfo<clang::clangd::SymbolID> {
+  static inline clang::clangd::SymbolID getEmptyKey() {
+    static clang::clangd::SymbolID EmptyKey("EMPTYKEY");
+    return EmptyKey;
+  }
+  static inline clang::clangd::SymbolID getTombstoneKey() {
+    static clang::clangd::SymbolID TombstoneKey("TOMBSTONEKEY");
+    return TombstoneKey;
+  }
+  static unsigned getHashValue(const clang::clangd::SymbolID &Sym) {
+    return hash_value(
+        ArrayRef<uint8_t>(Sym.HashValue.data(), Sym.HashValue.size()));
+  }
+  static bool isEqual(const clang::clangd::SymbolID &LHS,
+                      const clang::clangd::SymbolID &RHS) {
+    return LHS == RHS;
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H

Added: clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp?rev=320486&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp (added)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp Tue Dec 12 07:42:10 2017
@@ -0,0 +1,102 @@
+//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolCollector.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Index/IndexSymbol.h"
+#include "clang/Index/USRGeneration.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+namespace clang {
+namespace clangd {
+
+namespace {
+// Make the Path absolute using the current working directory of the given
+// SourceManager if the Path is not an absolute path.
+//
+// The Path can be a path relative to the build directory, or retrieved from
+// the SourceManager.
+std::string makeAbsolutePath(const SourceManager &SM, StringRef Path) {
+  llvm::SmallString<128> AbsolutePath(Path);
+  if (std::error_code EC =
+          SM.getFileManager().getVirtualFileSystem()->makeAbsolute(
+              AbsolutePath))
+    llvm::errs() << "Warning: could not make absolute file: '" << EC.message()
+                 << '\n';
+  // Handle the symbolic link path case where the current working directory
+  // (getCurrentWorkingDirectory) is a symlink./ We always want to the real
+  // file path (instead of the symlink path) for the  C++ symbols.
+  //
+  // Consider the following example:
+  //
+  //   src dir: /project/src/foo.h
+  //   current working directory (symlink): /tmp/build -> /project/src/
+  //
+  // The file path of Symbol is "/project/src/foo.h" instead of
+  // "/tmp/build/foo.h"
+  const DirectoryEntry *Dir = SM.getFileManager().getDirectory(
+      llvm::sys::path::parent_path(AbsolutePath.str()));
+  if (Dir) {
+    StringRef DirName = SM.getFileManager().getCanonicalName(Dir);
+    SmallVector<char, 128> AbsoluteFilename;
+    llvm::sys::path::append(AbsoluteFilename, DirName,
+                            llvm::sys::path::filename(AbsolutePath.str()));
+    return llvm::StringRef(AbsoluteFilename.data(), AbsoluteFilename.size())
+        .str();
+  }
+  return AbsolutePath.str();
+}
+} // namespace
+
+// Always return true to continue indexing.
+bool SymbolCollector::handleDeclOccurence(
+    const Decl *D, index::SymbolRoleSet Roles,
+    ArrayRef<index::SymbolRelation> Relations, FileID FID, unsigned Offset,
+    index::IndexDataConsumer::ASTNodeInfo ASTNode) {
+  // FIXME: collect all symbol references.
+  if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
+        Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
+    return true;
+
+  if (const NamedDecl *ND = llvm::dyn_cast<NamedDecl>(D)) {
+    // FIXME: Should we include the internal linkage symbols?
+    if (!ND->hasExternalFormalLinkage() || ND->isInAnonymousNamespace())
+      return true;
+
+    llvm::SmallVector<char, 128> Buff;
+    if (index::generateUSRForDecl(ND, Buff))
+      return true;
+
+    std::string USR(Buff.data(), Buff.size());
+    auto ID = SymbolID(USR);
+    if (Symbols.find(ID) != Symbols.end())
+      return true;
+
+    auto &SM = ND->getASTContext().getSourceManager();
+    SymbolLocation Location = {
+        makeAbsolutePath(SM, SM.getFilename(D->getLocation())),
+        SM.getFileOffset(D->getLocStart()), SM.getFileOffset(D->getLocEnd())};
+    Symbols.insert({std::move(ID), ND->getQualifiedNameAsString(),
+                    index::getSymbolInfo(D), std::move(Location)});
+  }
+
+  return true;
+}
+
+void SymbolCollector::finish() {
+  Symbols.freeze();
+}
+
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/index/SymbolCollector.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.h?rev=320486&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.h (added)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.h Tue Dec 12 07:42:10 2017
@@ -0,0 +1,43 @@
+//===--- SymbolCollector.h ---------------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Index.h"
+
+#include "clang/Index/IndexDataConsumer.h"
+#include "clang/Index/IndexSymbol.h"
+
+namespace clang {
+namespace clangd {
+
+// Collect all symbols from an AST.
+//
+// Clients (e.g. clangd) can use SymbolCollector together with
+// index::indexTopLevelDecls to retrieve all symbols when the source file is
+// changed.
+class SymbolCollector : public index::IndexDataConsumer {
+public:
+  SymbolCollector() = default;
+
+  bool
+  handleDeclOccurence(const Decl *D, index::SymbolRoleSet Roles,
+                      ArrayRef<index::SymbolRelation> Relations, FileID FID,
+                      unsigned Offset,
+                      index::IndexDataConsumer::ASTNodeInfo ASTNode) override;
+
+  void finish() override;
+
+  SymbolSlab takeSymbols() const { return std::move(Symbols); }
+
+private:
+  // All Symbols collected from the AST.
+  SymbolSlab Symbols;
+};
+
+} // namespace clangd
+} // namespace clang

Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt?rev=320486&r1=320485&r2=320486&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Tue Dec 12 07:42:10 2017
@@ -16,6 +16,7 @@ add_extra_unittest(ClangdTests
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp
+  SymbolCollectorTests.cpp
   )
 
 target_link_libraries(ClangdTests

Added: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp?rev=320486&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp Tue Dec 12 07:42:10 2017
@@ -0,0 +1,110 @@
+//===-- SymbolCollectorTests.cpp  -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/SymbolCollector.h"
+#include "clang/Index/IndexingAction.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+
+#include <memory>
+#include <string>
+
+using testing::UnorderedElementsAre;
+using testing::Eq;
+using testing::Field;
+
+// GMock helpers for matching Symbol.
+MATCHER_P(QName, Name, "") { return arg.second.QualifiedName == Name; }
+
+namespace clang {
+namespace clangd {
+
+namespace {
+class SymbolIndexActionFactory : public tooling::FrontendActionFactory {
+ public:
+  SymbolIndexActionFactory() = default;
+
+  clang::FrontendAction *create() override {
+    index::IndexingOptions IndexOpts;
+    IndexOpts.SystemSymbolFilter =
+        index::IndexingOptions::SystemSymbolFilterKind::All;
+    IndexOpts.IndexFunctionLocals = false;
+    Collector = std::make_shared<SymbolCollector>();
+    FrontendAction *Action =
+        index::createIndexingAction(Collector, IndexOpts, nullptr).release();
+    return Action;
+  }
+
+  std::shared_ptr<SymbolCollector> Collector;
+};
+
+class SymbolCollectorTest : public ::testing::Test {
+public:
+  bool runSymbolCollector(StringRef HeaderCode, StringRef MainCode) {
+    llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+        new vfs::InMemoryFileSystem);
+    llvm::IntrusiveRefCntPtr<FileManager> Files(
+        new FileManager(FileSystemOptions(), InMemoryFileSystem));
+
+    const std::string FileName = "symbol.cc";
+    const std::string HeaderName = "symbols.h";
+    auto Factory = llvm::make_unique<SymbolIndexActionFactory>();
+
+    tooling::ToolInvocation Invocation(
+        {"symbol_collector", "-fsyntax-only", "-std=c++11", FileName},
+        Factory->create(), Files.get(),
+        std::make_shared<PCHContainerOperations>());
+
+    InMemoryFileSystem->addFile(HeaderName, 0,
+                                llvm::MemoryBuffer::getMemBuffer(HeaderCode));
+
+    std::string Content = "#include\"" + std::string(HeaderName) + "\"";
+    Content += "\n" + MainCode.str();
+    InMemoryFileSystem->addFile(FileName, 0,
+                                llvm::MemoryBuffer::getMemBuffer(Content));
+    Invocation.run();
+    Symbols = Factory->Collector->takeSymbols();
+    return true;
+  }
+
+protected:
+  SymbolSlab Symbols;
+};
+
+TEST_F(SymbolCollectorTest, CollectSymbol) {
+  const std::string Header = R"(
+    class Foo {
+      void f();
+    };
+    void f1();
+    inline void f2() {}
+  )";
+  const std::string Main = R"(
+    namespace {
+    void ff() {} // ignore
+    }
+    void f1() {}
+  )";
+  runSymbolCollector(Header, Main);
+  EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("Foo::f"),
+                                            QName("f1"), QName("f2")));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang




More information about the cfe-commits mailing list