[Lldb-commits] [lldb] [lldb] Refactor TypeQuery::ContextMatches (PR #99305)
via lldb-commits
lldb-commits at lists.llvm.org
Wed Jul 17 03:49:41 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lldb
Author: Pavel Labath (labath)
<details>
<summary>Changes</summary>
This is a preparatory step for teaching the function about anonymous namespaces. It started out as a way to remove the assumption that the pattern and target contexts must be of the same length -- that's will not be correct with anonymous namespaces, and probably isn't even correct right now for AnyModule matches.
To make that work, I needed to reverse the direction of the matching (since partial matches can ignore outermost contexts). This in turn made me realise that reversing the greedy AnyModule matching algorithm would not preserve current behavior, and AnyModule would greedily consume everything, and leave nothing for the exact module name matches that are before it (the same thing could happen in the original algorithm with modules that come after an AnyModule pattern, but I guess we just don't make queries like that).
That's why the bulk of this patch is concerned with implementing a more correct (maybe? I'm assuming the intention was to provide glob-like matching) matching for AnyModule patterns. The algorithm is technically quadratic, but that's probably fine as I don't think we have deeply nested modules or many wildcard matches.
---
Full diff: https://github.com/llvm/llvm-project/pull/99305.diff
3 Files Affected:
- (modified) lldb/include/lldb/Symbol/Type.h (-5)
- (modified) lldb/source/Symbol/Type.cpp (+86-42)
- (modified) lldb/unittests/Symbol/TestType.cpp (+83-35)
``````````diff
diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h
index c6f30cde81867..365f600343e1d 100644
--- a/lldb/include/lldb/Symbol/Type.h
+++ b/lldb/include/lldb/Symbol/Type.h
@@ -65,11 +65,6 @@ struct CompilerContext {
llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
const CompilerContext &rhs);
-/// Match \p context_chain against \p pattern, which may contain "Any"
-/// kinds. The \p context_chain should *not* contain any "Any" kinds.
-bool contextMatches(llvm::ArrayRef<CompilerContext> context_chain,
- llvm::ArrayRef<CompilerContext> pattern);
-
FLAGS_ENUM(TypeQueryOptions){
e_none = 0u,
/// If set, TypeQuery::m_context contains an exact context that must match
diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp
index e76574795733f..67ed4a2e41f49 100644
--- a/lldb/source/Symbol/Type.cpp
+++ b/lldb/source/Symbol/Type.cpp
@@ -6,7 +6,9 @@
//
//===----------------------------------------------------------------------===//
+#include <algorithm>
#include <cstdio>
+#include <iterator>
#include <optional>
#include "lldb/Core/Module.h"
@@ -30,6 +32,7 @@
#include "lldb/Target/Process.h"
#include "lldb/Target/Target.h"
#include "lldb/lldb-enumerations.h"
+#include "lldb/lldb-private-enumerations.h"
#include "llvm/ADT/StringRef.h"
@@ -43,35 +46,6 @@ llvm::raw_ostream &lldb_private::operator<<(llvm::raw_ostream &os,
return os << lldb_stream.GetString();
}
-bool lldb_private::contextMatches(llvm::ArrayRef<CompilerContext> context_chain,
- llvm::ArrayRef<CompilerContext> pattern) {
- auto ctx = context_chain.begin();
- auto ctx_end = context_chain.end();
- for (const CompilerContext &pat : pattern) {
- // Early exit if the pattern is too long.
- if (ctx == ctx_end)
- return false;
- if (*ctx != pat) {
- // Skip any number of module matches.
- if (pat.kind == CompilerContextKind::AnyModule) {
- // Greedily match 0..n modules.
- ctx = std::find_if(ctx, ctx_end, [](const CompilerContext &ctx) {
- return ctx.kind != CompilerContextKind::Module;
- });
- continue;
- }
- // See if there is a kind mismatch; they should have 1 bit in common.
- if (((uint16_t)ctx->kind & (uint16_t)pat.kind) == 0)
- return false;
- // The name is ignored for AnyModule, but not for AnyType.
- if (pat.kind != CompilerContextKind::AnyModule && ctx->name != pat.name)
- return false;
- }
- ++ctx;
- }
- return true;
-}
-
static CompilerContextKind ConvertTypeClass(lldb::TypeClass type_class) {
if (type_class == eTypeClassAny)
return CompilerContextKind::AnyType;
@@ -153,19 +127,89 @@ void TypeQuery::SetLanguages(LanguageSet languages) {
bool TypeQuery::ContextMatches(
llvm::ArrayRef<CompilerContext> context_chain) const {
- if (GetExactMatch() || context_chain.size() == m_context.size())
- return ::contextMatches(context_chain, m_context);
-
- // We don't have an exact match, we need to bottom m_context.size() items to
- // match for a successful lookup.
- if (context_chain.size() < m_context.size())
- return false; // Not enough items in context_chain to allow for a match.
-
- size_t compare_count = context_chain.size() - m_context.size();
- return ::contextMatches(
- llvm::ArrayRef<CompilerContext>(context_chain.data() + compare_count,
- m_context.size()),
- m_context);
+ auto ctx = context_chain.rbegin(), ctx_end = context_chain.rend();
+ for (auto pat = m_context.rbegin(), pat_end = m_context.rend();
+ pat != pat_end;) {
+
+ // Handle AnyModule matches. These are tricky as they can match any number
+ // of modules.
+ if (pat->kind == CompilerContextKind::AnyModule) {
+ // Successive wildcards are equivalent to a single wildcard.
+ while (pat->kind == CompilerContextKind::AnyModule)
+ ++pat;
+
+ // [ctx, ctx_module_end) is the range of entries that may be matched by
+ // our wildcard.
+ auto ctx_module_end =
+ std::find_if(ctx, ctx_end, [](const CompilerContext &c) {
+ return c.kind != CompilerContextKind::Module;
+ });
+
+ // [pat, exact_pat_end) is the range of exact module match patterns. If
+ // it's not empty, we need to make sure our wildcard does not consume
+ // entries matched by those.
+ auto exact_pat_end =
+ std::find_if(pat, pat_end, [](const CompilerContext &p) {
+ return (p.kind & CompilerContextKind::AnyModule) !=
+ CompilerContextKind::Module;
+ });
+
+ if (pat == exact_pat_end) {
+ // No exact matches, just consume everything.
+ ctx = ctx_module_end;
+ continue;
+ }
+
+ // We have a non-empty module sequence after the wildcard. Now we need to
+ // look at what comes *after* that. If that's another wildcard, we want
+ // *this* wildcard to match as little as possible to give the other
+ // wildcard (and whatever comes after it) as much freedom as possible. If
+ // it's *not* another wildcard (we've reached the end of the pattern, or
+ // we have some non-module patterns after this), we want to match as much
+ // as possible, as there will be nothing left to match any remaining
+ // module entries.
+ bool greedy_match = exact_pat_end == pat_end ||
+ exact_pat_end->kind != CompilerContextKind::AnyModule;
+
+ auto pred = [](const CompilerContext &c, const CompilerContext &p) {
+ return c.name == p.name;
+ };
+ auto pos =
+ greedy_match
+ ? std::find_end(ctx, ctx_module_end, pat, exact_pat_end, pred)
+ : std::search(ctx, ctx_module_end, pat, exact_pat_end, pred);
+
+ if (pos == ctx_module_end)
+ return false; // Matching failed.
+
+ // We've successfully matched the wildcard and the exact-module sequence
+ // that comes after it.
+ ctx = std::next(pos, std::distance(pat, exact_pat_end));
+ pat = exact_pat_end;
+ continue;
+ }
+
+ if (ctx == ctx_end)
+ return false; // Pattern too long.
+
+ // See if there is a kind mismatch; they should have 1 bit in common.
+ if ((ctx->kind & pat->kind) == CompilerContextKind())
+ return false;
+
+ if (ctx->name != pat->name)
+ return false;
+
+ ++ctx;
+ ++pat;
+ }
+
+ // At this point, we have exhausted the pattern and we have a partial match at
+ // least. If that's all we're looking for, we're done.
+ if (!GetExactMatch())
+ return true;
+
+ // We have an exact match if we've exhausted the target context as well.
+ return ctx == ctx_end;
}
bool TypeQuery::LanguageMatches(lldb::LanguageType language) const {
diff --git a/lldb/unittests/Symbol/TestType.cpp b/lldb/unittests/Symbol/TestType.cpp
index 79201d6ba2e59..2814920bec3f5 100644
--- a/lldb/unittests/Symbol/TestType.cpp
+++ b/lldb/unittests/Symbol/TestType.cpp
@@ -7,13 +7,16 @@
//
//===----------------------------------------------------------------------===//
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "lldb/Symbol/Type.h"
#include "lldb/lldb-enumerations.h"
+#include "lldb/lldb-private-enumerations.h"
using namespace lldb;
using namespace lldb_private;
+using testing::Not;
TEST(Type, GetTypeScopeAndBasename) {
EXPECT_EQ(Type::GetTypeScopeAndBasename("int"),
@@ -47,40 +50,85 @@ TEST(Type, GetTypeScopeAndBasename) {
EXPECT_EQ(Type::GetTypeScopeAndBasename("foo<::bar"), std::nullopt);
}
+namespace {
+MATCHER_P(Matches, pattern, "") {
+ TypeQuery query(pattern, TypeQueryOptions::e_none);
+ return query.ContextMatches(arg);
+}
+} // namespace
+
TEST(Type, CompilerContextPattern) {
- std::vector<CompilerContext> mmc = {
- {CompilerContextKind::Module, ConstString("A")},
- {CompilerContextKind::Module, ConstString("B")},
- {CompilerContextKind::ClassOrStruct, ConstString("S")}};
- std::vector<CompilerContext> mc = {
- {CompilerContextKind::Module, ConstString("A")},
- {CompilerContextKind::ClassOrStruct, ConstString("S")}};
- std::vector<CompilerContext> mac = {
- {CompilerContextKind::Module, ConstString("A")},
- {CompilerContextKind::AnyModule, ConstString("*")},
- {CompilerContextKind::ClassOrStruct, ConstString("S")}};
- EXPECT_TRUE(contextMatches(mmc, mac));
- EXPECT_TRUE(contextMatches(mc, mac));
- EXPECT_FALSE(contextMatches(mac, mc));
- std::vector<CompilerContext> mmmc = {
- {CompilerContextKind::Module, ConstString("A")},
- {CompilerContextKind::Module, ConstString("B")},
- {CompilerContextKind::Module, ConstString("C")},
- {CompilerContextKind::ClassOrStruct, ConstString("S")}};
- EXPECT_TRUE(contextMatches(mmmc, mac));
- std::vector<CompilerContext> mme = {
- {CompilerContextKind::Module, ConstString("A")},
- {CompilerContextKind::Module, ConstString("B")},
- {CompilerContextKind::Enum, ConstString("S")}};
- std::vector<CompilerContext> mma = {
- {CompilerContextKind::Module, ConstString("A")},
- {CompilerContextKind::Module, ConstString("B")},
- {CompilerContextKind::AnyType, ConstString("S")}};
- EXPECT_TRUE(contextMatches(mme, mma));
- EXPECT_TRUE(contextMatches(mmc, mma));
- std::vector<CompilerContext> mme2 = {
- {CompilerContextKind::Module, ConstString("A")},
- {CompilerContextKind::Module, ConstString("B")},
- {CompilerContextKind::Enum, ConstString("S2")}};
- EXPECT_FALSE(contextMatches(mme2, mma));
+ const CompilerContext any_module(CompilerContextKind::AnyModule,
+ ConstString("*"));
+ auto make_module = [](llvm::StringRef name) {
+ return CompilerContext(CompilerContextKind::Module, ConstString(name));
+ };
+ auto make_class = [](llvm::StringRef name) {
+ return CompilerContext(CompilerContextKind::ClassOrStruct,
+ ConstString(name));
+ };
+ auto make_any_type = [](llvm::StringRef name) {
+ return CompilerContext(CompilerContextKind::AnyType, ConstString(name));
+ };
+ auto make_enum = [](llvm::StringRef name) {
+ return CompilerContext(CompilerContextKind::Enum, ConstString(name));
+ };
+ auto make_namespace = [](llvm::StringRef name) {
+ return CompilerContext(CompilerContextKind::Namespace, ConstString(name));
+ };
+
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_class("C")}),
+ Matches(std::vector{make_module("A"), any_module, make_class("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_class("C")}),
+ Matches(std::vector{make_module("A"), any_module, make_class("C")}));
+ EXPECT_THAT((std::vector{make_module("A"), make_class("C")}),
+ Matches(std::vector{any_module, make_class("C")}));
+ EXPECT_THAT((std::vector{make_module("A"), any_module, make_class("C")}),
+ Not(Matches(std::vector{make_module("A"), make_class("C")})));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_module("C"),
+ make_class("C")}),
+ Matches(std::vector{make_module("A"), any_module, make_class("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_module("C"),
+ make_module("A"), make_module("B"), make_module("C")}),
+ Matches(std::vector{make_module("A"), any_module, make_module("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_module("C"),
+ make_module("A"), make_module("B"), make_module("C")}),
+ Matches(std::vector{make_module("A"), any_module, any_module,
+ make_module("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_module("C"),
+ make_module("A"), make_module("B"), make_module("C")}),
+ Matches(std::vector{any_module, make_module("A"), any_module,
+ make_module("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_module("C"),
+ make_module("A"), make_module("B"), make_module("C")}),
+ Matches(std::vector{any_module, make_module("A"), any_module,
+ make_module("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_module("C"),
+ make_module("A"), make_module("B"), make_module("C")}),
+ Not(Matches(std::vector{any_module, make_module("B"), any_module,
+ make_module("A"), any_module, make_module("A"),
+ any_module})));
+ EXPECT_THAT((std::vector{make_module("A"), make_module("B"), make_enum("C")}),
+ Matches(std::vector{make_module("A"), make_module("B"),
+ make_any_type("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_class("C")}),
+ Matches(
+ std::vector{make_module("A"), make_module("B"), make_any_type("C")}));
+ EXPECT_THAT(
+ (std::vector{make_module("A"), make_module("B"), make_enum("C2")}),
+ Not(Matches(std::vector{make_module("A"), make_module("B"),
+ make_any_type("C")})));
+ EXPECT_THAT((std::vector{make_class("C")}),
+ Matches(std::vector{make_class("C")}));
+ EXPECT_THAT((std::vector{make_namespace("NS"), make_class("C")}),
+ Not(Matches(std::vector{make_any_type("C")})));
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/99305
More information about the lldb-commits
mailing list