[Lldb-commits] [lldb] [lldb] Improve type name parsing (PR #91586)
via lldb-commits
lldb-commits at lists.llvm.org
Thu May 9 05:43:11 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lldb
Author: Pavel Labath (labath)
<details>
<summary>Changes</summary>
Parsing of '::' scopes in TypeQuery was very naive and failed for names with '::''s in template arguments. Interestingly, one of the functions it was calling (Type::GetTypeScopeAndBasename) was already doing the same thing, and getting it (mostly (*)) right. This refactors the function so that it can return the scope results, fixing the parsing of names like std::vector<int, std::allocator<int>>::iterator.
Two callers of GetTypeScopeAndBasename are deleted as the functions are not used (I presume they stopped being used once we started pruning type search results more eagerly).
(*) This implementation is still not correct when one takes c++ operators into account -- e.g., something like `X<&A::operator<>::T` is a legitimate type name. We do have an implementation that is able to handle names like these (CPlusPlusLanguage::MethodName), but using it is not trivial, because it is hidden in a language plugin and specific to method name parsing.
---
Patch is 23.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91586.diff
10 Files Affected:
- (modified) lldb/include/lldb/Symbol/Type.h (+31-4)
- (modified) lldb/include/lldb/Symbol/TypeList.h (-9)
- (modified) lldb/include/lldb/Symbol/TypeMap.h (-4)
- (modified) lldb/source/Symbol/Type.cpp (+59-71)
- (modified) lldb/source/Symbol/TypeList.cpp (-109)
- (modified) lldb/source/Symbol/TypeMap.cpp (-73)
- (added) lldb/test/API/python_api/sbmodule/FindTypes/Makefile (+3)
- (added) lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py (+40)
- (added) lldb/test/API/python_api/sbmodule/FindTypes/main.cpp (+17)
- (modified) lldb/unittests/Symbol/TestType.cpp (+30-32)
``````````diff
diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h
index 1c4f7b5601b0c..4194639dcfd2a 100644
--- a/lldb/include/lldb/Symbol/Type.h
+++ b/lldb/include/lldb/Symbol/Type.h
@@ -21,6 +21,8 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/Support/raw_ostream.h"
#include <optional>
#include <set>
@@ -492,12 +494,37 @@ class Type : public std::enable_shared_from_this<Type>, public UserID {
static int Compare(const Type &a, const Type &b);
+ // Represents a parsed type name coming out of GetTypeScopeAndBasename. The
+ // structure holds StringRefs pointing to portions of the original name, and
+ // so most not be used after the name is destroyed.
+ struct ParsedName {
+ lldb::TypeClass type_class = lldb::eTypeClassAny;
+
+ // Scopes of the type, starting with the outermost. Absolute type references
+ // have a "::" as the first scope.
+ llvm::SmallVector<llvm::StringRef> scope;
+
+ llvm::StringRef basename;
+
+ friend bool operator==(const ParsedName &lhs, const ParsedName &rhs) {
+ return lhs.type_class == rhs.type_class && lhs.scope == rhs.scope &&
+ lhs.basename == rhs.basename;
+ }
+
+ friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+ const ParsedName &name) {
+ return os << llvm::formatv(
+ "Type::ParsedName({0:x}, [{1}], {2})",
+ llvm::to_underlying(name.type_class),
+ llvm::make_range(name.scope.begin(), name.scope.end()),
+ name.basename);
+ }
+ };
// From a fully qualified typename, split the type into the type basename and
// the remaining type scope (namespaces/classes).
- static bool GetTypeScopeAndBasename(llvm::StringRef name,
- llvm::StringRef &scope,
- llvm::StringRef &basename,
- lldb::TypeClass &type_class);
+ static std::optional<ParsedName>
+ GetTypeScopeAndBasename(llvm::StringRef name);
+
void SetEncodingType(Type *encoding_type) { m_encoding_type = encoding_type; }
uint32_t GetEncodingMask();
diff --git a/lldb/include/lldb/Symbol/TypeList.h b/lldb/include/lldb/Symbol/TypeList.h
index 403469c989f58..d58772ad5b62e 100644
--- a/lldb/include/lldb/Symbol/TypeList.h
+++ b/lldb/include/lldb/Symbol/TypeList.h
@@ -49,15 +49,6 @@ class TypeList {
void ForEach(std::function<bool(lldb::TypeSP &type_sp)> const &callback);
- void RemoveMismatchedTypes(llvm::StringRef qualified_typename,
- bool exact_match);
-
- void RemoveMismatchedTypes(llvm::StringRef type_scope,
- llvm::StringRef type_basename,
- lldb::TypeClass type_class, bool exact_match);
-
- void RemoveMismatchedTypes(lldb::TypeClass type_class);
-
private:
typedef collection::iterator iterator;
typedef collection::const_iterator const_iterator;
diff --git a/lldb/include/lldb/Symbol/TypeMap.h b/lldb/include/lldb/Symbol/TypeMap.h
index 433711875e553..89011efab5c31 100644
--- a/lldb/include/lldb/Symbol/TypeMap.h
+++ b/lldb/include/lldb/Symbol/TypeMap.h
@@ -55,10 +55,6 @@ class TypeMap {
bool Remove(const lldb::TypeSP &type_sp);
- void RemoveMismatchedTypes(llvm::StringRef type_scope,
- llvm::StringRef type_basename,
- lldb::TypeClass type_class, bool exact_match);
-
private:
typedef collection::iterator iterator;
typedef collection::const_iterator const_iterator;
diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp
index b85c38097ebe2..6bf69c2ded287 100644
--- a/lldb/source/Symbol/Type.cpp
+++ b/lldb/source/Symbol/Type.cpp
@@ -29,6 +29,7 @@
#include "lldb/Target/ExecutionContext.h"
#include "lldb/Target/Process.h"
#include "lldb/Target/Target.h"
+#include "lldb/lldb-enumerations.h"
#include "llvm/ADT/StringRef.h"
@@ -85,27 +86,23 @@ static CompilerContextKind ConvertTypeClass(lldb::TypeClass type_class) {
TypeQuery::TypeQuery(llvm::StringRef name, TypeQueryOptions options)
: m_options(options) {
- llvm::StringRef scope, basename;
- lldb::TypeClass type_class = lldb::eTypeClassAny;
- if (Type::GetTypeScopeAndBasename(name, scope, basename, type_class)) {
- if (scope.consume_front("::"))
- m_options |= e_exact_match;
+ if (std::optional<Type::ParsedName> parsed_name =
+ Type::GetTypeScopeAndBasename(name)) {
+ llvm::ArrayRef scope = parsed_name->scope;
if (!scope.empty()) {
- std::pair<llvm::StringRef, llvm::StringRef> scope_pair =
- scope.split("::");
- while (!scope_pair.second.empty()) {
- m_context.push_back({CompilerContextKind::AnyDeclContext,
- ConstString(scope_pair.first.str())});
- scope_pair = scope_pair.second.split("::");
+ if (scope[0] == "::") {
+ m_options |= e_exact_match;
+ scope = scope.drop_front();
+ }
+ for (llvm::StringRef s : scope) {
+ m_context.push_back(
+ {CompilerContextKind::AnyDeclContext, ConstString(s)});
}
- m_context.push_back({CompilerContextKind::AnyDeclContext,
- ConstString(scope_pair.first.str())});
}
- m_context.push_back(
- {ConvertTypeClass(type_class), ConstString(basename.str())});
+ m_context.push_back({ConvertTypeClass(parsed_name->type_class),
+ ConstString(parsed_name->basename)});
} else {
- m_context.push_back(
- {CompilerContextKind::AnyType, ConstString(name.str())});
+ m_context.push_back({CompilerContextKind::AnyType, ConstString(name)});
}
}
@@ -773,65 +770,56 @@ ConstString Type::GetQualifiedName() {
return GetForwardCompilerType().GetTypeName();
}
-bool Type::GetTypeScopeAndBasename(llvm::StringRef name,
- llvm::StringRef &scope,
- llvm::StringRef &basename,
- TypeClass &type_class) {
- type_class = eTypeClassAny;
+std::optional<Type::ParsedName>
+Type::GetTypeScopeAndBasename(llvm::StringRef name) {
+ ParsedName result;
if (name.empty())
- return false;
-
- // Clear the scope in case we have just a type class and a basename.
- scope = llvm::StringRef();
- basename = name;
- if (basename.consume_front("struct "))
- type_class = eTypeClassStruct;
- else if (basename.consume_front("class "))
- type_class = eTypeClassClass;
- else if (basename.consume_front("union "))
- type_class = eTypeClassUnion;
- else if (basename.consume_front("enum "))
- type_class = eTypeClassEnumeration;
- else if (basename.consume_front("typedef "))
- type_class = eTypeClassTypedef;
-
- size_t namespace_separator = basename.find("::");
- if (namespace_separator == llvm::StringRef::npos) {
- // If "name" started a type class we need to return true with no scope.
- return type_class != eTypeClassAny;
- }
-
- size_t template_begin = basename.find('<');
- while (namespace_separator != llvm::StringRef::npos) {
- if (template_begin != llvm::StringRef::npos &&
- namespace_separator > template_begin) {
- size_t template_depth = 1;
- llvm::StringRef template_arg =
- basename.drop_front(template_begin + 1);
- while (template_depth > 0 && !template_arg.empty()) {
- if (template_arg.front() == '<')
- template_depth++;
- else if (template_arg.front() == '>')
- template_depth--;
- template_arg = template_arg.drop_front(1);
+ return std::nullopt;
+
+ if (name.consume_front("struct "))
+ result.type_class = eTypeClassStruct;
+ else if (name.consume_front("class "))
+ result.type_class = eTypeClassClass;
+ else if (name.consume_front("union "))
+ result.type_class = eTypeClassUnion;
+ else if (name.consume_front("enum "))
+ result.type_class = eTypeClassEnumeration;
+ else if (name.consume_front("typedef "))
+ result.type_class = eTypeClassTypedef;
+
+ if (name.consume_front("::"))
+ result.scope.push_back("::");
+
+ bool prev_is_colon = false;
+ size_t template_depth = 0;
+ size_t name_begin = 0;
+ for (const auto &pos : llvm::enumerate(name)) {
+ switch (pos.value()) {
+ case ':':
+ if (prev_is_colon && template_depth == 0) {
+ result.scope.push_back(name.slice(name_begin, pos.index() - 1));
+ name_begin = pos.index() + 1;
}
- if (template_depth != 0)
- return false; // We have an invalid type name. Bail out.
- if (template_arg.empty())
- break; // The template ends at the end of the full name.
- basename = template_arg;
- } else {
- basename = basename.drop_front(namespace_separator + 2);
+ break;
+ case '<':
+ ++template_depth;
+ break;
+ case '>':
+ if (template_depth == 0)
+ return std::nullopt; // Invalid name.
+ --template_depth;
+ break;
}
- template_begin = basename.find('<');
- namespace_separator = basename.find("::");
- }
- if (basename.size() < name.size()) {
- scope = name.take_front(name.size() - basename.size());
- return true;
+ prev_is_colon = pos.value() == ':';
}
- return false;
+
+ if (name_begin < name.size() && template_depth == 0)
+ result.basename = name.substr(name_begin);
+ else
+ return std::nullopt;
+
+ return result;
}
ModuleSP Type::GetModule() {
diff --git a/lldb/source/Symbol/TypeList.cpp b/lldb/source/Symbol/TypeList.cpp
index 2e101e0a8f574..5748871893157 100644
--- a/lldb/source/Symbol/TypeList.cpp
+++ b/lldb/source/Symbol/TypeList.cpp
@@ -96,112 +96,3 @@ void TypeList::Dump(Stream *s, bool show_context) {
if (Type *t = pos->get())
t->Dump(s, show_context);
}
-
-void TypeList::RemoveMismatchedTypes(llvm::StringRef qualified_typename,
- bool exact_match) {
- llvm::StringRef type_scope;
- llvm::StringRef type_basename;
- TypeClass type_class = eTypeClassAny;
- if (!Type::GetTypeScopeAndBasename(qualified_typename, type_scope,
- type_basename, type_class)) {
- type_basename = qualified_typename;
- type_scope = "";
- }
- return RemoveMismatchedTypes(type_scope, type_basename, type_class,
- exact_match);
-}
-
-void TypeList::RemoveMismatchedTypes(llvm::StringRef type_scope,
- llvm::StringRef type_basename,
- TypeClass type_class, bool exact_match) {
- // Our "collection" type currently is a std::map which doesn't have any good
- // way to iterate and remove items from the map so we currently just make a
- // new list and add all of the matching types to it, and then swap it into
- // m_types at the end
- collection matching_types;
-
- iterator pos, end = m_types.end();
-
- for (pos = m_types.begin(); pos != end; ++pos) {
- Type *the_type = pos->get();
- bool keep_match = false;
- TypeClass match_type_class = eTypeClassAny;
-
- if (type_class != eTypeClassAny) {
- match_type_class = the_type->GetForwardCompilerType().GetTypeClass();
- if ((match_type_class & type_class) == 0)
- continue;
- }
-
- ConstString match_type_name_const_str(the_type->GetQualifiedName());
- if (match_type_name_const_str) {
- const char *match_type_name = match_type_name_const_str.GetCString();
- llvm::StringRef match_type_scope;
- llvm::StringRef match_type_basename;
- if (Type::GetTypeScopeAndBasename(match_type_name, match_type_scope,
- match_type_basename,
- match_type_class)) {
- if (match_type_basename == type_basename) {
- const size_t type_scope_size = type_scope.size();
- const size_t match_type_scope_size = match_type_scope.size();
- if (exact_match || (type_scope_size == match_type_scope_size)) {
- keep_match = match_type_scope == type_scope;
- } else {
- if (match_type_scope_size > type_scope_size) {
- const size_t type_scope_pos = match_type_scope.rfind(type_scope);
- if (type_scope_pos == match_type_scope_size - type_scope_size) {
- if (type_scope_pos >= 2) {
- // Our match scope ends with the type scope we were looking
- // for, but we need to make sure what comes before the
- // matching type scope is a namespace boundary in case we are
- // trying to match: type_basename = "d" type_scope = "b::c::"
- // We want to match:
- // match_type_scope "a::b::c::"
- // But not:
- // match_type_scope "a::bb::c::"
- // So below we make sure what comes before "b::c::" in
- // match_type_scope is "::", or the namespace boundary
- if (match_type_scope[type_scope_pos - 1] == ':' &&
- match_type_scope[type_scope_pos - 2] == ':') {
- keep_match = true;
- }
- }
- }
- }
- }
- }
- } else {
- // The type we are currently looking at doesn't exists in a namespace
- // or class, so it only matches if there is no type scope...
- keep_match = type_scope.empty() && type_basename == match_type_name;
- }
- }
-
- if (keep_match) {
- matching_types.push_back(*pos);
- }
- }
- m_types.swap(matching_types);
-}
-
-void TypeList::RemoveMismatchedTypes(TypeClass type_class) {
- if (type_class == eTypeClassAny)
- return;
-
- // Our "collection" type currently is a std::map which doesn't have any good
- // way to iterate and remove items from the map so we currently just make a
- // new list and add all of the matching types to it, and then swap it into
- // m_types at the end
- collection matching_types;
-
- iterator pos, end = m_types.end();
-
- for (pos = m_types.begin(); pos != end; ++pos) {
- Type *the_type = pos->get();
- TypeClass match_type_class =
- the_type->GetForwardCompilerType().GetTypeClass();
- if (match_type_class & type_class)
- matching_types.push_back(*pos);
- }
- m_types.swap(matching_types);
-}
diff --git a/lldb/source/Symbol/TypeMap.cpp b/lldb/source/Symbol/TypeMap.cpp
index 8933de53749cc..9d7c05f318a1d 100644
--- a/lldb/source/Symbol/TypeMap.cpp
+++ b/lldb/source/Symbol/TypeMap.cpp
@@ -132,76 +132,3 @@ void TypeMap::Dump(Stream *s, bool show_context,
for (const auto &pair : m_types)
pair.second->Dump(s, show_context, level);
}
-
-void TypeMap::RemoveMismatchedTypes(llvm::StringRef type_scope,
- llvm::StringRef type_basename,
- TypeClass type_class, bool exact_match) {
- // Our "collection" type currently is a std::map which doesn't have any good
- // way to iterate and remove items from the map so we currently just make a
- // new list and add all of the matching types to it, and then swap it into
- // m_types at the end
- collection matching_types;
-
- iterator pos, end = m_types.end();
-
- for (pos = m_types.begin(); pos != end; ++pos) {
- Type *the_type = pos->second.get();
- bool keep_match = false;
- TypeClass match_type_class = eTypeClassAny;
-
- if (type_class != eTypeClassAny) {
- match_type_class = the_type->GetForwardCompilerType().GetTypeClass();
- if ((match_type_class & type_class) == 0)
- continue;
- }
-
- ConstString match_type_name_const_str(the_type->GetQualifiedName());
- if (match_type_name_const_str) {
- const char *match_type_name = match_type_name_const_str.GetCString();
- llvm::StringRef match_type_scope;
- llvm::StringRef match_type_basename;
- if (Type::GetTypeScopeAndBasename(match_type_name, match_type_scope,
- match_type_basename,
- match_type_class)) {
- if (match_type_basename == type_basename) {
- const size_t type_scope_size = type_scope.size();
- const size_t match_type_scope_size = match_type_scope.size();
- if (exact_match || (type_scope_size == match_type_scope_size)) {
- keep_match = match_type_scope == type_scope;
- } else {
- if (match_type_scope_size > type_scope_size) {
- const size_t type_scope_pos = match_type_scope.rfind(type_scope);
- if (type_scope_pos == match_type_scope_size - type_scope_size) {
- if (type_scope_pos >= 2) {
- // Our match scope ends with the type scope we were looking
- // for, but we need to make sure what comes before the
- // matching type scope is a namespace boundary in case we are
- // trying to match: type_basename = "d" type_scope = "b::c::"
- // We want to match:
- // match_type_scope "a::b::c::"
- // But not:
- // match_type_scope "a::bb::c::"
- // So below we make sure what comes before "b::c::" in
- // match_type_scope is "::", or the namespace boundary
- if (match_type_scope[type_scope_pos - 1] == ':' &&
- match_type_scope[type_scope_pos - 2] == ':') {
- keep_match = true;
- }
- }
- }
- }
- }
- }
- } else {
- // The type we are currently looking at doesn't exists in a namespace
- // or class, so it only matches if there is no type scope...
- keep_match = type_scope.empty() && type_basename == match_type_name;
- }
- }
-
- if (keep_match) {
- matching_types.insert(*pos);
- }
- }
- m_types.swap(matching_types);
-}
diff --git a/lldb/test/API/python_api/sbmodule/FindTypes/Makefile b/lldb/test/API/python_api/sbmodule/FindTypes/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/FindTypes/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py b/lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py
new file mode 100644
index 0000000000000..5c3d2b4187ddf
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py
@@ -0,0 +1,40 @@
+"""Test the SBModule::FindTypes."""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestSBModuleFindTypes(TestBase):
+ def test_lookup_in_template_scopes(self):
+ self.build()
+ spec = lldb.SBModuleSpec()
+ spec.SetFileSpec(lldb.SBFileSpec(self.getBuildArtifact()))
+ module = lldb.SBModule(spec)
+
+ self.assertEqual(
+ set([t.GetName() for t in module.FindTypes("LookMeUp")]),
+ set(
+ [
+ "ns1::Foo<void>::LookMeUp",
+ "ns2::Bar<void>::LookMeUp",
+ "ns1::Foo<ns2::Bar<void> >::LookMeUp",
+ ]
+ ),
+ )
+
+ self.assertEqual(
+ set([t.GetName() for t in module.FindTypes("ns1::Foo<void>::LookMeUp")]),
+ set(["ns1::Foo<void>::LookMeUp"]),
+ )
+
+ self.assertEqual(
+ set(
+ [
+ t.GetName()
+ for t in module.FindTypes("ns1::Foo<ns2::Bar<void> >::LookMeUp")
+ ]
+ ),
+ set(["ns1::Foo<ns2::Bar<void> >::LookMeUp"]),
+ )
diff --git a/lldb/test/API/python_api/sbmodule/FindTypes/main.cpp b/lldb/test/API/python_api/sbmodule/FindTypes/main.cpp
new file mode 100644
index 0000000000000..cb2646ce312a9
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/FindTypes/main.cpp
@@ -0,0 +1,17 @@
+namespace ns1 {
+template <typename T> struct Foo {
+ struct LookMeUp {};
+};
+} // namespace ns1
+
+namespace ns2 {
+template <typename T> struct Bar {
+ struct LookMeUp {};
+};
+} // namespace ns2
+
+ns1::Foo<void>::LookM...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/91586
More information about the lldb-commits
mailing list