[clang-tools-extra] [clangd] Improve filtering logic for undesired proto symbols (PR #110091)
kadir çetinkaya via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 30 02:25:21 PDT 2024
https://github.com/kadircet updated https://github.com/llvm/llvm-project/pull/110091
>From 02e1677ed40ef23792943d9b2cb895ba80312ee9 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet at google.com>
Date: Thu, 26 Sep 2024 10:55:49 +0200
Subject: [PATCH] [clangd] Improve filtering logic for undesired proto symbols
This used to filter any names with `_` in them, apart from
enum-constants. Resulting in discrepancies in behavior when we had
fields that have `_` in the name, or for accessors like `set_`, `has_`.
The logic seems to be trying to filter mangled names for nested entries,
so adjusted logic to only do so for top-level decls, while still
preserving some public top-level helpers.
Heuristics are still leaning towards false-negatives, e.g. if a
top-level entity has `_` in its name (`message Foo_Bar {}`), it'll be
filtered, or an enum that prefixes its type name to constants
(`enum Foo { Foo_OK }`).
---
.../clangd/index/SymbolCollector.cpp | 69 +++++++++++++++----
.../clangd/unittests/SymbolCollectorTests.cpp | 64 ++++++++++++++---
2 files changed, 111 insertions(+), 22 deletions(-)
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index a76894cf0855f3..d1d744a21cfd55 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -41,6 +41,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
@@ -75,18 +76,62 @@ bool isPrivateProtoDecl(const NamedDecl &ND) {
if (ND.getIdentifier() == nullptr)
return false;
auto Name = ND.getIdentifier()->getName();
- if (!Name.contains('_'))
- return false;
- // Nested proto entities (e.g. Message::Nested) have top-level decls
- // that shouldn't be used (Message_Nested). Ignore them completely.
- // The nested entities are dangling type aliases, we may want to reconsider
- // including them in the future.
- // For enum constants, SOME_ENUM_CONSTANT is not private and should be
- // indexed. Outer_INNER is private. This heuristic relies on naming style, it
- // will include OUTER_INNER and exclude some_enum_constant.
- // FIXME: the heuristic relies on naming style (i.e. no underscore in
- // user-defined names) and can be improved.
- return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
+ // There are some internal helpers like _internal_set_foo();
+ if (Name.contains("_internal_"))
+ return true;
+
+ // https://protobuf.dev/reference/cpp/cpp-generated/#nested-types
+ // Nested entities (messages/enums) has two names, one at the top-level scope,
+ // with a mangled name created by prepending all the outer types. These names
+ // are almost never preferred by the developers, so exclude them from index.
+ // e.g.
+ // message Foo {
+ // message Bar {}
+ // enum E { A }
+ // }
+ //
+ // yields:
+ // class Foo_Bar {};
+ // enum Foo_E { Foo_E_A };
+ // class Foo {
+ // using Bar = Foo_Bar;
+ // static constexpr Foo_E A = Foo_E_A;
+ // };
+
+ // We get rid of Foo_Bar and Foo_E by discarding any top-level entries with
+ // `_` in the name. This relies on original message/enum not having `_` in the
+ // name. Hence might go wrong in certain cases.
+ if (ND.getDeclContext()->isNamespace()) {
+ // Strip off some known public suffix helpers for enums, rest of the helpers
+ // are generated inside record decls so we don't care.
+ // https://protobuf.dev/reference/cpp/cpp-generated/#enum
+ Name.consume_back("_descriptor");
+ Name.consume_back("_IsValid");
+ Name.consume_back("_Name");
+ Name.consume_back("_Parse");
+ Name.consume_back("_MIN");
+ Name.consume_back("_MAX");
+ Name.consume_back("_ARRAYSIZE");
+ return Name.contains('_');
+ }
+
+ // EnumConstantDecls need some special attention, despite being nested in a
+ // TagDecl, they might still have mangled names. We filter those by checking
+ // if it has parent's name as a prefix.
+ // This might go wrong if a nested entity has a name that starts with parent's
+ // name, e.g: enum Foo { Foo_X }.
+ if (llvm::isa<EnumConstantDecl>(&ND)) {
+ auto *DC = llvm::cast<EnumDecl>(ND.getDeclContext());
+ if (!DC || !DC->getIdentifier())
+ return false;
+ auto CtxName = DC->getIdentifier()->getName();
+ return !CtxName.empty() && Name.consume_front(CtxName) &&
+ Name.consume_front("_");
+ }
+
+ // Now we're only left with fields/methods without an `_internal_` in the
+ // name, they're intended for public use.
+ return false;
}
// We only collect #include paths for symbols that are suitable for global code
diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
index 0666be95b6b9ee..e8088cb37fa51c 100644
--- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -201,19 +201,63 @@ TEST_F(ShouldCollectSymbolTest, NoPrivateProtoSymbol) {
build(
R"(// Generated by the protocol buffer compiler. DO NOT EDIT!
namespace nx {
- class Top_Level {};
- class TopLevel {};
- enum Kind {
- KIND_OK,
- Kind_Not_Ok,
+ enum Outer_Enum : int {
+ Outer_Enum_KIND1,
+ Outer_Enum_Kind_2,
};
+ bool Outer_Enum_IsValid(int);
+
+ class Outer_Inner {};
+ class Outer {
+ using Inner = Outer_Inner;
+ using Enum = Outer_Enum;
+ static constexpr Enum KIND1 = Outer_Enum_KIND1;
+ static constexpr Enum Kind_2 = Outer_Enum_Kind_2;
+ static bool Enum_IsValid(int);
+ int &x();
+ void set_x();
+ void _internal_set_x();
+
+ int &Outer_y();
+ };
+ enum Foo {
+ FOO_VAL1,
+ Foo_VAL2,
+ };
+ bool Foo_IsValid(int);
})");
- EXPECT_TRUE(shouldCollect("nx::TopLevel"));
- EXPECT_TRUE(shouldCollect("nx::Kind::KIND_OK"));
- EXPECT_TRUE(shouldCollect("nx::Kind"));
- EXPECT_FALSE(shouldCollect("nx::Top_Level"));
- EXPECT_FALSE(shouldCollect("nx::Kind::Kind_Not_Ok"));
+ // Make sure all the mangled names for Outer::Enum is discarded.
+ EXPECT_FALSE(shouldCollect("nx::Outer_Enum"));
+ EXPECT_FALSE(shouldCollect("nx::Outer_Enum_KIND1"));
+ EXPECT_FALSE(shouldCollect("nx::Outer_Enum_Kind_2"));
+ EXPECT_FALSE(shouldCollect("nx::Outer_Enum_IsValid"));
+ // But nested aliases are preserved.
+ EXPECT_TRUE(shouldCollect("nx::Outer::Enum"));
+ EXPECT_TRUE(shouldCollect("nx::Outer::KIND1"));
+ EXPECT_TRUE(shouldCollect("nx::Outer::Kind_2"));
+ EXPECT_TRUE(shouldCollect("nx::Outer::Enum_IsValid"));
+
+ // Check for Outer::Inner.
+ EXPECT_FALSE(shouldCollect("nx::Outer_Inner"));
+ EXPECT_TRUE(shouldCollect("nx::Outer"));
+ EXPECT_TRUE(shouldCollect("nx::Outer::Inner"));
+
+ // Make sure field related information is preserved, unless it's explicitly
+ // marked with `_internal_`.
+ EXPECT_TRUE(shouldCollect("nx::Outer::x"));
+ EXPECT_TRUE(shouldCollect("nx::Outer::set_x"));
+ EXPECT_FALSE(shouldCollect("nx::Outer::_internal_set_x"));
+ EXPECT_TRUE(shouldCollect("nx::Outer::Outer_y"));
+
+ // Handling of a top-level enum
+ EXPECT_TRUE(shouldCollect("nx::Foo::FOO_VAL1"));
+ EXPECT_TRUE(shouldCollect("nx::FOO_VAL1"));
+ EXPECT_TRUE(shouldCollect("nx::Foo_IsValid"));
+ // Our heuristic goes wrong here, if the user has a nested name that starts
+ // with parent's name.
+ EXPECT_FALSE(shouldCollect("nx::Foo::Foo_VAL2"));
+ EXPECT_FALSE(shouldCollect("nx::Foo_VAL2"));
}
TEST_F(ShouldCollectSymbolTest, DoubleCheckProtoHeaderComment) {
More information about the cfe-commits
mailing list