[Lldb-commits] [lldb] [lldb] Improve type name parsing (PR #91586)

via lldb-commits lldb-commits at lists.llvm.org
Thu May 9 05:43:11 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: Pavel Labath (labath)

<details>
<summary>Changes</summary>

Parsing of '::' scopes in TypeQuery was very naive and failed for names with '::''s in template arguments. Interestingly, one of the functions it was calling (Type::GetTypeScopeAndBasename) was already doing the same thing, and getting it (mostly (*)) right. This refactors the function so that it can return the scope results, fixing the parsing of names like std::vector<int, std::allocator<int>>::iterator.

Two callers of GetTypeScopeAndBasename are deleted as the functions are not used (I presume they stopped being used once we started pruning type search results more eagerly).

(*) This implementation is still not correct when one takes c++ operators into account -- e.g., something like `X<&A::operator<>::T` is a legitimate type name. We do have an implementation that is able to handle names like these (CPlusPlusLanguage::MethodName), but using it is not trivial, because it is hidden in a language plugin and specific to method name parsing.

---

Patch is 23.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91586.diff


10 Files Affected:

- (modified) lldb/include/lldb/Symbol/Type.h (+31-4) 
- (modified) lldb/include/lldb/Symbol/TypeList.h (-9) 
- (modified) lldb/include/lldb/Symbol/TypeMap.h (-4) 
- (modified) lldb/source/Symbol/Type.cpp (+59-71) 
- (modified) lldb/source/Symbol/TypeList.cpp (-109) 
- (modified) lldb/source/Symbol/TypeMap.cpp (-73) 
- (added) lldb/test/API/python_api/sbmodule/FindTypes/Makefile (+3) 
- (added) lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py (+40) 
- (added) lldb/test/API/python_api/sbmodule/FindTypes/main.cpp (+17) 
- (modified) lldb/unittests/Symbol/TestType.cpp (+30-32) 


``````````diff
diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h
index 1c4f7b5601b0c..4194639dcfd2a 100644
--- a/lldb/include/lldb/Symbol/Type.h
+++ b/lldb/include/lldb/Symbol/Type.h
@@ -21,6 +21,8 @@
 
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/Support/raw_ostream.h"
 
 #include <optional>
 #include <set>
@@ -492,12 +494,37 @@ class Type : public std::enable_shared_from_this<Type>, public UserID {
 
   static int Compare(const Type &a, const Type &b);
 
+  // Represents a parsed type name coming out of GetTypeScopeAndBasename. The
+  // structure holds StringRefs pointing to portions of the original name, and
+  // so most not be used after the name is destroyed.
+  struct ParsedName {
+    lldb::TypeClass type_class = lldb::eTypeClassAny;
+
+    // Scopes of the type, starting with the outermost. Absolute type references
+    // have a "::" as the first scope.
+    llvm::SmallVector<llvm::StringRef> scope;
+
+    llvm::StringRef basename;
+
+    friend bool operator==(const ParsedName &lhs, const ParsedName &rhs) {
+      return lhs.type_class == rhs.type_class && lhs.scope == rhs.scope &&
+             lhs.basename == rhs.basename;
+    }
+
+    friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+                                         const ParsedName &name) {
+      return os << llvm::formatv(
+                 "Type::ParsedName({0:x}, [{1}], {2})",
+                 llvm::to_underlying(name.type_class),
+                 llvm::make_range(name.scope.begin(), name.scope.end()),
+                 name.basename);
+    }
+  };
   // From a fully qualified typename, split the type into the type basename and
   // the remaining type scope (namespaces/classes).
-  static bool GetTypeScopeAndBasename(llvm::StringRef name,
-                                      llvm::StringRef &scope,
-                                      llvm::StringRef &basename,
-                                      lldb::TypeClass &type_class);
+  static std::optional<ParsedName>
+  GetTypeScopeAndBasename(llvm::StringRef name);
+
   void SetEncodingType(Type *encoding_type) { m_encoding_type = encoding_type; }
 
   uint32_t GetEncodingMask();
diff --git a/lldb/include/lldb/Symbol/TypeList.h b/lldb/include/lldb/Symbol/TypeList.h
index 403469c989f58..d58772ad5b62e 100644
--- a/lldb/include/lldb/Symbol/TypeList.h
+++ b/lldb/include/lldb/Symbol/TypeList.h
@@ -49,15 +49,6 @@ class TypeList {
 
   void ForEach(std::function<bool(lldb::TypeSP &type_sp)> const &callback);
 
-  void RemoveMismatchedTypes(llvm::StringRef qualified_typename,
-                             bool exact_match);
-
-  void RemoveMismatchedTypes(llvm::StringRef type_scope,
-                             llvm::StringRef type_basename,
-                             lldb::TypeClass type_class, bool exact_match);
-
-  void RemoveMismatchedTypes(lldb::TypeClass type_class);
-
 private:
   typedef collection::iterator iterator;
   typedef collection::const_iterator const_iterator;
diff --git a/lldb/include/lldb/Symbol/TypeMap.h b/lldb/include/lldb/Symbol/TypeMap.h
index 433711875e553..89011efab5c31 100644
--- a/lldb/include/lldb/Symbol/TypeMap.h
+++ b/lldb/include/lldb/Symbol/TypeMap.h
@@ -55,10 +55,6 @@ class TypeMap {
 
   bool Remove(const lldb::TypeSP &type_sp);
 
-  void RemoveMismatchedTypes(llvm::StringRef type_scope,
-                             llvm::StringRef type_basename,
-                             lldb::TypeClass type_class, bool exact_match);
-
 private:
   typedef collection::iterator iterator;
   typedef collection::const_iterator const_iterator;
diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp
index b85c38097ebe2..6bf69c2ded287 100644
--- a/lldb/source/Symbol/Type.cpp
+++ b/lldb/source/Symbol/Type.cpp
@@ -29,6 +29,7 @@
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/Target.h"
+#include "lldb/lldb-enumerations.h"
 
 #include "llvm/ADT/StringRef.h"
 
@@ -85,27 +86,23 @@ static CompilerContextKind ConvertTypeClass(lldb::TypeClass type_class) {
 
 TypeQuery::TypeQuery(llvm::StringRef name, TypeQueryOptions options)
     : m_options(options) {
-  llvm::StringRef scope, basename;
-  lldb::TypeClass type_class = lldb::eTypeClassAny;
-  if (Type::GetTypeScopeAndBasename(name, scope, basename, type_class)) {
-    if (scope.consume_front("::"))
-      m_options |= e_exact_match;
+  if (std::optional<Type::ParsedName> parsed_name =
+          Type::GetTypeScopeAndBasename(name)) {
+    llvm::ArrayRef scope = parsed_name->scope;
     if (!scope.empty()) {
-      std::pair<llvm::StringRef, llvm::StringRef> scope_pair =
-          scope.split("::");
-      while (!scope_pair.second.empty()) {
-        m_context.push_back({CompilerContextKind::AnyDeclContext,
-                             ConstString(scope_pair.first.str())});
-        scope_pair = scope_pair.second.split("::");
+      if (scope[0] == "::") {
+        m_options |= e_exact_match;
+        scope = scope.drop_front();
+      }
+      for (llvm::StringRef s : scope) {
+        m_context.push_back(
+            {CompilerContextKind::AnyDeclContext, ConstString(s)});
       }
-      m_context.push_back({CompilerContextKind::AnyDeclContext,
-                           ConstString(scope_pair.first.str())});
     }
-    m_context.push_back(
-        {ConvertTypeClass(type_class), ConstString(basename.str())});
+    m_context.push_back({ConvertTypeClass(parsed_name->type_class),
+                         ConstString(parsed_name->basename)});
   } else {
-    m_context.push_back(
-        {CompilerContextKind::AnyType, ConstString(name.str())});
+    m_context.push_back({CompilerContextKind::AnyType, ConstString(name)});
   }
 }
 
@@ -773,65 +770,56 @@ ConstString Type::GetQualifiedName() {
   return GetForwardCompilerType().GetTypeName();
 }
 
-bool Type::GetTypeScopeAndBasename(llvm::StringRef name,
-                                   llvm::StringRef &scope,
-                                   llvm::StringRef &basename,
-                                   TypeClass &type_class) {
-  type_class = eTypeClassAny;
+std::optional<Type::ParsedName>
+Type::GetTypeScopeAndBasename(llvm::StringRef name) {
+  ParsedName result;
 
   if (name.empty())
-    return false;
-
-  // Clear the scope in case we have just a type class and a basename.
-  scope = llvm::StringRef();
-  basename = name;
-  if (basename.consume_front("struct "))
-    type_class = eTypeClassStruct;
-  else if (basename.consume_front("class "))
-    type_class = eTypeClassClass;
-  else if (basename.consume_front("union "))
-    type_class = eTypeClassUnion;
-  else if (basename.consume_front("enum "))
-    type_class = eTypeClassEnumeration;
-  else if (basename.consume_front("typedef "))
-    type_class = eTypeClassTypedef;
-
-  size_t namespace_separator = basename.find("::");
-  if (namespace_separator == llvm::StringRef::npos) {
-    // If "name" started a type class we need to return true with no scope.
-    return type_class != eTypeClassAny;
-  }
-
-  size_t template_begin = basename.find('<');
-  while (namespace_separator != llvm::StringRef::npos) {
-    if (template_begin != llvm::StringRef::npos &&
-        namespace_separator > template_begin) {
-      size_t template_depth = 1;
-      llvm::StringRef template_arg =
-          basename.drop_front(template_begin + 1);
-      while (template_depth > 0 && !template_arg.empty()) {
-        if (template_arg.front() == '<')
-          template_depth++;
-        else if (template_arg.front() == '>')
-          template_depth--;
-        template_arg = template_arg.drop_front(1);
+    return std::nullopt;
+
+  if (name.consume_front("struct "))
+    result.type_class = eTypeClassStruct;
+  else if (name.consume_front("class "))
+    result.type_class = eTypeClassClass;
+  else if (name.consume_front("union "))
+    result.type_class = eTypeClassUnion;
+  else if (name.consume_front("enum "))
+    result.type_class = eTypeClassEnumeration;
+  else if (name.consume_front("typedef "))
+    result.type_class = eTypeClassTypedef;
+
+  if (name.consume_front("::"))
+    result.scope.push_back("::");
+
+  bool prev_is_colon = false;
+  size_t template_depth = 0;
+  size_t name_begin = 0;
+  for (const auto &pos : llvm::enumerate(name)) {
+    switch (pos.value()) {
+    case ':':
+      if (prev_is_colon && template_depth == 0) {
+        result.scope.push_back(name.slice(name_begin, pos.index() - 1));
+        name_begin = pos.index() + 1;
       }
-      if (template_depth != 0)
-        return false; // We have an invalid type name. Bail out.
-      if (template_arg.empty())
-        break; // The template ends at the end of the full name.
-      basename = template_arg;
-    } else {
-      basename = basename.drop_front(namespace_separator + 2);
+      break;
+    case '<':
+      ++template_depth;
+      break;
+    case '>':
+      if (template_depth == 0)
+        return std::nullopt; // Invalid name.
+      --template_depth;
+      break;
     }
-    template_begin = basename.find('<');
-    namespace_separator = basename.find("::");
-  }
-  if (basename.size() < name.size()) {
-    scope = name.take_front(name.size() - basename.size());
-    return true;
+    prev_is_colon = pos.value() == ':';
   }
-  return false;
+
+  if (name_begin < name.size() && template_depth == 0)
+    result.basename = name.substr(name_begin);
+  else
+    return std::nullopt;
+
+  return result;
 }
 
 ModuleSP Type::GetModule() {
diff --git a/lldb/source/Symbol/TypeList.cpp b/lldb/source/Symbol/TypeList.cpp
index 2e101e0a8f574..5748871893157 100644
--- a/lldb/source/Symbol/TypeList.cpp
+++ b/lldb/source/Symbol/TypeList.cpp
@@ -96,112 +96,3 @@ void TypeList::Dump(Stream *s, bool show_context) {
     if (Type *t = pos->get())
       t->Dump(s, show_context);
 }
-
-void TypeList::RemoveMismatchedTypes(llvm::StringRef qualified_typename,
-                                     bool exact_match) {
-  llvm::StringRef type_scope;
-  llvm::StringRef type_basename;
-  TypeClass type_class = eTypeClassAny;
-  if (!Type::GetTypeScopeAndBasename(qualified_typename, type_scope,
-                                     type_basename, type_class)) {
-    type_basename = qualified_typename;
-    type_scope = "";
-  }
-  return RemoveMismatchedTypes(type_scope, type_basename, type_class,
-                               exact_match);
-}
-
-void TypeList::RemoveMismatchedTypes(llvm::StringRef type_scope,
-                                     llvm::StringRef type_basename,
-                                     TypeClass type_class, bool exact_match) {
-  // Our "collection" type currently is a std::map which doesn't have any good
-  // way to iterate and remove items from the map so we currently just make a
-  // new list and add all of the matching types to it, and then swap it into
-  // m_types at the end
-  collection matching_types;
-
-  iterator pos, end = m_types.end();
-
-  for (pos = m_types.begin(); pos != end; ++pos) {
-    Type *the_type = pos->get();
-    bool keep_match = false;
-    TypeClass match_type_class = eTypeClassAny;
-
-    if (type_class != eTypeClassAny) {
-      match_type_class = the_type->GetForwardCompilerType().GetTypeClass();
-      if ((match_type_class & type_class) == 0)
-        continue;
-    }
-
-    ConstString match_type_name_const_str(the_type->GetQualifiedName());
-    if (match_type_name_const_str) {
-      const char *match_type_name = match_type_name_const_str.GetCString();
-      llvm::StringRef match_type_scope;
-      llvm::StringRef match_type_basename;
-      if (Type::GetTypeScopeAndBasename(match_type_name, match_type_scope,
-                                        match_type_basename,
-                                        match_type_class)) {
-        if (match_type_basename == type_basename) {
-          const size_t type_scope_size = type_scope.size();
-          const size_t match_type_scope_size = match_type_scope.size();
-          if (exact_match || (type_scope_size == match_type_scope_size)) {
-            keep_match = match_type_scope == type_scope;
-          } else {
-            if (match_type_scope_size > type_scope_size) {
-              const size_t type_scope_pos = match_type_scope.rfind(type_scope);
-              if (type_scope_pos == match_type_scope_size - type_scope_size) {
-                if (type_scope_pos >= 2) {
-                  // Our match scope ends with the type scope we were looking
-                  // for, but we need to make sure what comes before the
-                  // matching type scope is a namespace boundary in case we are
-                  // trying to match: type_basename = "d" type_scope = "b::c::"
-                  // We want to match:
-                  //  match_type_scope "a::b::c::"
-                  // But not:
-                  //  match_type_scope "a::bb::c::"
-                  // So below we make sure what comes before "b::c::" in
-                  // match_type_scope is "::", or the namespace boundary
-                  if (match_type_scope[type_scope_pos - 1] == ':' &&
-                      match_type_scope[type_scope_pos - 2] == ':') {
-                    keep_match = true;
-                  }
-                }
-              }
-            }
-          }
-        }
-      } else {
-        // The type we are currently looking at doesn't exists in a namespace
-        // or class, so it only matches if there is no type scope...
-        keep_match = type_scope.empty() && type_basename == match_type_name;
-      }
-    }
-
-    if (keep_match) {
-      matching_types.push_back(*pos);
-    }
-  }
-  m_types.swap(matching_types);
-}
-
-void TypeList::RemoveMismatchedTypes(TypeClass type_class) {
-  if (type_class == eTypeClassAny)
-    return;
-
-  // Our "collection" type currently is a std::map which doesn't have any good
-  // way to iterate and remove items from the map so we currently just make a
-  // new list and add all of the matching types to it, and then swap it into
-  // m_types at the end
-  collection matching_types;
-
-  iterator pos, end = m_types.end();
-
-  for (pos = m_types.begin(); pos != end; ++pos) {
-    Type *the_type = pos->get();
-    TypeClass match_type_class =
-        the_type->GetForwardCompilerType().GetTypeClass();
-    if (match_type_class & type_class)
-      matching_types.push_back(*pos);
-  }
-  m_types.swap(matching_types);
-}
diff --git a/lldb/source/Symbol/TypeMap.cpp b/lldb/source/Symbol/TypeMap.cpp
index 8933de53749cc..9d7c05f318a1d 100644
--- a/lldb/source/Symbol/TypeMap.cpp
+++ b/lldb/source/Symbol/TypeMap.cpp
@@ -132,76 +132,3 @@ void TypeMap::Dump(Stream *s, bool show_context,
   for (const auto &pair : m_types)
     pair.second->Dump(s, show_context, level);
 }
-
-void TypeMap::RemoveMismatchedTypes(llvm::StringRef type_scope,
-                                    llvm::StringRef type_basename,
-                                    TypeClass type_class, bool exact_match) {
-  // Our "collection" type currently is a std::map which doesn't have any good
-  // way to iterate and remove items from the map so we currently just make a
-  // new list and add all of the matching types to it, and then swap it into
-  // m_types at the end
-  collection matching_types;
-
-  iterator pos, end = m_types.end();
-
-  for (pos = m_types.begin(); pos != end; ++pos) {
-    Type *the_type = pos->second.get();
-    bool keep_match = false;
-    TypeClass match_type_class = eTypeClassAny;
-
-    if (type_class != eTypeClassAny) {
-      match_type_class = the_type->GetForwardCompilerType().GetTypeClass();
-      if ((match_type_class & type_class) == 0)
-        continue;
-    }
-
-    ConstString match_type_name_const_str(the_type->GetQualifiedName());
-    if (match_type_name_const_str) {
-      const char *match_type_name = match_type_name_const_str.GetCString();
-      llvm::StringRef match_type_scope;
-      llvm::StringRef match_type_basename;
-      if (Type::GetTypeScopeAndBasename(match_type_name, match_type_scope,
-                                        match_type_basename,
-                                        match_type_class)) {
-        if (match_type_basename == type_basename) {
-          const size_t type_scope_size = type_scope.size();
-          const size_t match_type_scope_size = match_type_scope.size();
-          if (exact_match || (type_scope_size == match_type_scope_size)) {
-            keep_match = match_type_scope == type_scope;
-          } else {
-            if (match_type_scope_size > type_scope_size) {
-              const size_t type_scope_pos = match_type_scope.rfind(type_scope);
-              if (type_scope_pos == match_type_scope_size - type_scope_size) {
-                if (type_scope_pos >= 2) {
-                  // Our match scope ends with the type scope we were looking
-                  // for, but we need to make sure what comes before the
-                  // matching type scope is a namespace boundary in case we are
-                  // trying to match: type_basename = "d" type_scope = "b::c::"
-                  // We want to match:
-                  //  match_type_scope "a::b::c::"
-                  // But not:
-                  //  match_type_scope "a::bb::c::"
-                  // So below we make sure what comes before "b::c::" in
-                  // match_type_scope is "::", or the namespace boundary
-                  if (match_type_scope[type_scope_pos - 1] == ':' &&
-                      match_type_scope[type_scope_pos - 2] == ':') {
-                    keep_match = true;
-                  }
-                }
-              }
-            }
-          }
-        }
-      } else {
-        // The type we are currently looking at doesn't exists in a namespace
-        // or class, so it only matches if there is no type scope...
-        keep_match = type_scope.empty() && type_basename == match_type_name;
-      }
-    }
-
-    if (keep_match) {
-      matching_types.insert(*pos);
-    }
-  }
-  m_types.swap(matching_types);
-}
diff --git a/lldb/test/API/python_api/sbmodule/FindTypes/Makefile b/lldb/test/API/python_api/sbmodule/FindTypes/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/FindTypes/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py b/lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py
new file mode 100644
index 0000000000000..5c3d2b4187ddf
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/FindTypes/TestSBModuleFindTypes.py
@@ -0,0 +1,40 @@
+"""Test the SBModule::FindTypes."""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestSBModuleFindTypes(TestBase):
+    def test_lookup_in_template_scopes(self):
+        self.build()
+        spec = lldb.SBModuleSpec()
+        spec.SetFileSpec(lldb.SBFileSpec(self.getBuildArtifact()))
+        module = lldb.SBModule(spec)
+
+        self.assertEqual(
+            set([t.GetName() for t in module.FindTypes("LookMeUp")]),
+            set(
+                [
+                    "ns1::Foo<void>::LookMeUp",
+                    "ns2::Bar<void>::LookMeUp",
+                    "ns1::Foo<ns2::Bar<void> >::LookMeUp",
+                ]
+            ),
+        )
+
+        self.assertEqual(
+            set([t.GetName() for t in module.FindTypes("ns1::Foo<void>::LookMeUp")]),
+            set(["ns1::Foo<void>::LookMeUp"]),
+        )
+
+        self.assertEqual(
+            set(
+                [
+                    t.GetName()
+                    for t in module.FindTypes("ns1::Foo<ns2::Bar<void> >::LookMeUp")
+                ]
+            ),
+            set(["ns1::Foo<ns2::Bar<void> >::LookMeUp"]),
+        )
diff --git a/lldb/test/API/python_api/sbmodule/FindTypes/main.cpp b/lldb/test/API/python_api/sbmodule/FindTypes/main.cpp
new file mode 100644
index 0000000000000..cb2646ce312a9
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/FindTypes/main.cpp
@@ -0,0 +1,17 @@
+namespace ns1 {
+template <typename T> struct Foo {
+  struct LookMeUp {};
+};
+} // namespace ns1
+
+namespace ns2 {
+template <typename T> struct Bar {
+  struct LookMeUp {};
+};
+} // namespace ns2
+
+ns1::Foo<void>::LookM...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/91586


More information about the lldb-commits mailing list