[clang-tools-extra] r332456 - [clangd] Filter out private proto symbols in SymbolCollector.

Eric Liu via cfe-commits cfe-commits at lists.llvm.org
Wed May 16 05:12:31 PDT 2018


Author: ioeric
Date: Wed May 16 05:12:30 2018
New Revision: 332456

URL: http://llvm.org/viewvc/llvm-project?rev=332456&view=rev
Log:
[clangd] Filter out private proto symbols in SymbolCollector.

Summary:
This uses heuristics to identify private proto symbols. For example,
top-level symbols whose name contains "_" are considered private. These symbols
are not expected to be used by users.

Reviewers: ilya-biryukov, malaperle

Reviewed By: ilya-biryukov

Subscribers: sammccall, klimek, MaskRay, jkorous, cfe-commits

Differential Revision: https://reviews.llvm.org/D46751

Modified:
    clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
    clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp?rev=332456&r1=332455&r2=332456&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolCollector.cpp Wed May 16 05:12:30 2018
@@ -89,6 +89,46 @@ llvm::Optional<std::string> toURI(const
   return llvm::None;
 }
 
+// All proto generated headers should start with this line.
+static const char *PROTO_HEADER_COMMENT =
+    "// Generated by the protocol buffer compiler.  DO NOT EDIT!";
+
+// Checks whether the decl is a private symbol in a header generated by
+// protobuf compiler.
+// To identify whether a proto header is actually generated by proto compiler,
+// we check whether it starts with PROTO_HEADER_COMMENT.
+// FIXME: make filtering extensible when there are more use cases for symbol
+// filters.
+bool isPrivateProtoDecl(const NamedDecl &ND) {
+  const auto &SM = ND.getASTContext().getSourceManager();
+  auto Loc = findNameLoc(&ND);
+  auto FileName = SM.getFilename(Loc);
+  if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h"))
+    return false;
+  auto FID = SM.getFileID(Loc);
+  // Double check that this is an actual protobuf header.
+  if (!SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT))
+    return false;
+
+  // ND without identifier can be operators.
+  if (ND.getIdentifier() == nullptr)
+    return false;
+  auto Name = ND.getIdentifier()->getName();
+  if (!Name.contains('_'))
+    return false;
+  // Nested proto entities (e.g. Message::Nested) have top-level decls
+  // that shouldn't be used (Message_Nested). Ignore them completely.
+  // The nested entities are dangling type aliases, we may want to reconsider
+  // including them in the future.
+  // For enum constants, SOME_ENUM_CONSTANT is not private and should be
+  // indexed. Outer_INNER is private. This heuristic relies on naming style, it
+  // will include OUTER_INNER and exclude some_enum_constant.
+  // FIXME: the heuristic relies on naming style (i.e. no underscore in
+  // user-defined names) and can be improved.
+  return (ND.getKind() != Decl::EnumConstant) ||
+         std::any_of(Name.begin(), Name.end(), islower);
+}
+
 bool shouldFilterDecl(const NamedDecl *ND, ASTContext *ASTCtx,
                       const SymbolCollector::Options &Opts) {
   using namespace clang::ast_matchers;
@@ -129,6 +169,9 @@ bool shouldFilterDecl(const NamedDecl *N
           .empty())
     return true;
 
+  // Avoid indexing internal symbols in protobuf generated headers.
+  if (isPrivateProtoDecl(*ND))
+    return true;
   return false;
 }
 

Modified: clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp?rev=332456&r1=332455&r2=332456&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/SymbolCollectorTests.cpp Wed May 16 05:12:30 2018
@@ -697,6 +697,41 @@ TEST_F(SymbolCollectorTest, UTF16Charact
                            AllOf(QName("pörk"), DeclRange(Header.range()))));
 }
 
+TEST_F(SymbolCollectorTest, FilterPrivateProtoSymbols) {
+  TestHeaderName = testPath("x.proto.h");
+  const std::string Header =
+      R"(// Generated by the protocol buffer compiler.  DO NOT EDIT!
+         namespace nx {
+           class Top_Level {};
+           class TopLevel {};
+           enum Kind {
+             KIND_OK,
+             Kind_Not_Ok,
+           };
+           bool operator<(const TopLevel &, const TopLevel &);
+         })";
+  runSymbolCollector(Header, /*Main=*/"");
+  EXPECT_THAT(Symbols,
+              UnorderedElementsAre(QName("nx"), QName("nx::TopLevel"),
+                                   QName("nx::Kind"), QName("nx::KIND_OK"),
+                                   QName("nx::operator<")));
+}
+
+TEST_F(SymbolCollectorTest, DoubleCheckProtoHeaderComment) {
+  TestHeaderName = testPath("x.proto.h");
+  const std::string Header = R"(
+  namespace nx {
+    class Top_Level {};
+    enum Kind {
+      Kind_Fine
+    };
+  }
+  )";
+  runSymbolCollector(Header, /*Main=*/"");
+  EXPECT_THAT(Symbols,
+              UnorderedElementsAre(QName("nx"), QName("nx::Top_Level"),
+                                   QName("nx::Kind"), QName("nx::Kind_Fine")));
+}
 
 } // namespace
 } // namespace clangd




More information about the cfe-commits mailing list