[Lldb-commits] [lldb] [LLDB][NFC] Move CPlusPlusLanguage methods used in Core/Module.cpp to a separated module to break lldb-server dependencies (PR #132274)
Dmitry Vasilyev via lldb-commits
lldb-commits at lists.llvm.org
Thu Mar 20 12:11:35 PDT 2025
https://github.com/slydiman created https://github.com/llvm/llvm-project/pull/132274
This patch addresses the issue #129543.
lldb/source/Core/Module.cpp uses few static helpers and the class CPlusPlusLanguage::MethodName from the CPlusPlusLanguage plug-in. The CPlusPlusLanguage plug-in depends on other plug-ins. This causes many plugins linking, including TypeSystemClang and a lot of clang code.
After this patch the size of lldb-server is reduced by 9MB.
>From bbb28c99007ad48e38d458099acca848984f62fd Mon Sep 17 00:00:00 2001
From: Dmitry Vasilyev <dvassiliev at accesssoftek.com>
Date: Thu, 20 Mar 2025 21:50:51 +0400
Subject: [PATCH] [LLDB][NFC] Move CPlusPlusLanguage methods used in
Core/Module.cpp to a separated module to break lldb-server dependencies
This patch addresses the issue #129543.
lldb/source/Core/Module.cpp uses few static helpers and the class CPlusPlusLanguage::MethodName from the CPlusPlusLanguage plug-in.
The CPlusPlusLanguage plug-in depends on other plug-ins. This causes many plugins linking, including TypeSystemClang and a lot of clang code.
After this patch the size of lldb-server is reduced by 9MB.
---
.../Plugins/Language/CPlusPlus/CMakeLists.txt | 1 +
.../Language/CPlusPlus/CPlusPlusLanguage.cpp | 264 -----------------
.../CPlusPlus/CPlusPlusLanguageMethod.cpp | 279 ++++++++++++++++++
3 files changed, 280 insertions(+), 264 deletions(-)
create mode 100644 lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt b/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt
index ccdc4d0ae99b3..5b866ee8edc02 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt
+++ b/lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt
@@ -2,6 +2,7 @@ add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN
BlockPointer.cpp
Coroutines.cpp
CPlusPlusLanguage.cpp
+ CPlusPlusLanguageMethod.cpp
CPlusPlusNameParser.cpp
CxxStringTypes.cpp
GenericBitset.cpp
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index 4b045d12ad494..2696fa87fafbf 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -36,7 +36,6 @@
#include "lldb/ValueObject/ValueObjectVariable.h"
#include "BlockPointer.h"
-#include "CPlusPlusNameParser.h"
#include "Coroutines.h"
#include "CxxStringTypes.h"
#include "Generic.h"
@@ -44,7 +43,6 @@
#include "LibCxxAtomic.h"
#include "LibCxxVariant.h"
#include "LibStdcpp.h"
-#include "MSVCUndecoratedNameParser.h"
#include "lldb/lldb-enumerations.h"
using namespace lldb;
@@ -106,74 +104,6 @@ Language *CPlusPlusLanguage::CreateInstance(lldb::LanguageType language) {
return nullptr;
}
-void CPlusPlusLanguage::MethodName::Clear() {
- m_full.Clear();
- m_basename = llvm::StringRef();
- m_context = llvm::StringRef();
- m_arguments = llvm::StringRef();
- m_qualifiers = llvm::StringRef();
- m_return_type = llvm::StringRef();
- m_parsed = false;
- m_parse_error = false;
-}
-
-static bool ReverseFindMatchingChars(const llvm::StringRef &s,
- const llvm::StringRef &left_right_chars,
- size_t &left_pos, size_t &right_pos,
- size_t pos = llvm::StringRef::npos) {
- assert(left_right_chars.size() == 2);
- left_pos = llvm::StringRef::npos;
- const char left_char = left_right_chars[0];
- const char right_char = left_right_chars[1];
- pos = s.find_last_of(left_right_chars, pos);
- if (pos == llvm::StringRef::npos || s[pos] == left_char)
- return false;
- right_pos = pos;
- uint32_t depth = 1;
- while (pos > 0 && depth > 0) {
- pos = s.find_last_of(left_right_chars, pos);
- if (pos == llvm::StringRef::npos)
- return false;
- if (s[pos] == left_char) {
- if (--depth == 0) {
- left_pos = pos;
- return left_pos < right_pos;
- }
- } else if (s[pos] == right_char) {
- ++depth;
- }
- }
- return false;
-}
-
-static bool IsTrivialBasename(const llvm::StringRef &basename) {
- // Check that the basename matches with the following regular expression
- // "^~?([A-Za-z_][A-Za-z_0-9]*)$" We are using a hand written implementation
- // because it is significantly more efficient then using the general purpose
- // regular expression library.
- size_t idx = 0;
- if (basename.starts_with('~'))
- idx = 1;
-
- if (basename.size() <= idx)
- return false; // Empty string or "~"
-
- if (!std::isalpha(basename[idx]) && basename[idx] != '_')
- return false; // First character (after removing the possible '~'') isn't in
- // [A-Za-z_]
-
- // Read all characters matching [A-Za-z_0-9]
- ++idx;
- while (idx < basename.size()) {
- if (!std::isalnum(basename[idx]) && basename[idx] != '_')
- break;
- ++idx;
- }
-
- // We processed all characters. It is a vaild basename.
- return idx == basename.size();
-}
-
/// Writes out the function name in 'full_name' to 'out_stream'
/// but replaces each argument type with the variable name
/// and the corresponding pretty-printed value
@@ -208,206 +138,12 @@ static bool PrettyPrintFunctionNameWithArgs(Stream &out_stream,
return true;
}
-bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() {
- // This method tries to parse simple method definitions which are presumably
- // most comman in user programs. Definitions that can be parsed by this
- // function don't have return types and templates in the name.
- // A::B::C::fun(std::vector<T> &) const
- size_t arg_start, arg_end;
- llvm::StringRef full(m_full.GetCString());
- llvm::StringRef parens("()", 2);
- if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
- m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
- if (arg_end + 1 < full.size())
- m_qualifiers = full.substr(arg_end + 1).ltrim();
-
- if (arg_start == 0)
- return false;
- size_t basename_end = arg_start;
- size_t context_start = 0;
- size_t context_end = full.rfind(':', basename_end);
- if (context_end == llvm::StringRef::npos)
- m_basename = full.substr(0, basename_end);
- else {
- if (context_start < context_end)
- m_context = full.substr(context_start, context_end - 1 - context_start);
- const size_t basename_begin = context_end + 1;
- m_basename = full.substr(basename_begin, basename_end - basename_begin);
- }
-
- if (IsTrivialBasename(m_basename)) {
- return true;
- } else {
- // The C++ basename doesn't match our regular expressions so this can't
- // be a valid C++ method, clear everything out and indicate an error
- m_context = llvm::StringRef();
- m_basename = llvm::StringRef();
- m_arguments = llvm::StringRef();
- m_qualifiers = llvm::StringRef();
- m_return_type = llvm::StringRef();
- return false;
- }
- }
- return false;
-}
-
-void CPlusPlusLanguage::MethodName::Parse() {
- if (!m_parsed && m_full) {
- if (TrySimplifiedParse()) {
- m_parse_error = false;
- } else {
- CPlusPlusNameParser parser(m_full.GetStringRef());
- if (auto function = parser.ParseAsFunctionDefinition()) {
- m_basename = function->name.basename;
- m_context = function->name.context;
- m_arguments = function->arguments;
- m_qualifiers = function->qualifiers;
- m_return_type = function->return_type;
- m_parse_error = false;
- } else {
- m_parse_error = true;
- }
- }
- m_parsed = true;
- }
-}
-
-llvm::StringRef CPlusPlusLanguage::MethodName::GetBasename() {
- if (!m_parsed)
- Parse();
- return m_basename;
-}
-
-llvm::StringRef CPlusPlusLanguage::MethodName::GetContext() {
- if (!m_parsed)
- Parse();
- return m_context;
-}
-
-llvm::StringRef CPlusPlusLanguage::MethodName::GetArguments() {
- if (!m_parsed)
- Parse();
- return m_arguments;
-}
-
-llvm::StringRef CPlusPlusLanguage::MethodName::GetQualifiers() {
- if (!m_parsed)
- Parse();
- return m_qualifiers;
-}
-
-llvm::StringRef CPlusPlusLanguage::MethodName::GetReturnType() {
- if (!m_parsed)
- Parse();
- return m_return_type;
-}
-
-std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() {
- if (!m_parsed)
- Parse();
- if (m_context.empty())
- return std::string(m_basename);
-
- std::string res;
- res += m_context;
- res += "::";
- res += m_basename;
- return res;
-}
-
-llvm::StringRef
-CPlusPlusLanguage::MethodName::GetBasenameNoTemplateParameters() {
- llvm::StringRef basename = GetBasename();
- size_t arg_start, arg_end;
- llvm::StringRef parens("<>", 2);
- if (ReverseFindMatchingChars(basename, parens, arg_start, arg_end))
- return basename.substr(0, arg_start);
-
- return basename;
-}
-
-bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) {
- if (!m_parsed)
- Parse();
-
- // If we can't parse the incoming name, then just check that it contains path.
- if (m_parse_error)
- return m_full.GetStringRef().contains(path);
-
- llvm::StringRef identifier;
- llvm::StringRef context;
- std::string path_str = path.str();
- bool success = CPlusPlusLanguage::ExtractContextAndIdentifier(
- path_str.c_str(), context, identifier);
- if (!success)
- return m_full.GetStringRef().contains(path);
-
- // Basename may include template arguments.
- // E.g.,
- // GetBaseName(): func<int>
- // identifier : func
- //
- // ...but we still want to account for identifiers with template parameter
- // lists, e.g., when users set breakpoints on template specializations.
- //
- // E.g.,
- // GetBaseName(): func<uint32_t>
- // identifier : func<int32_t*>
- //
- // Try to match the basename with or without template parameters.
- if (GetBasename() != identifier &&
- GetBasenameNoTemplateParameters() != identifier)
- return false;
-
- // Incoming path only had an identifier, so we match.
- if (context.empty())
- return true;
- // Incoming path has context but this method does not, no match.
- if (m_context.empty())
- return false;
-
- llvm::StringRef haystack = m_context;
- if (!haystack.consume_back(context))
- return false;
- if (haystack.empty() || !isalnum(haystack.back()))
- return true;
-
- return false;
-}
-
-bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) {
- // FIXME!! we should really run through all the known C++ Language plugins
- // and ask each one if this is a C++ mangled name
-
- Mangled::ManglingScheme scheme = Mangled::GetManglingScheme(name);
-
- if (scheme == Mangled::eManglingSchemeNone)
- return false;
-
- return true;
-}
-
bool CPlusPlusLanguage::DemangledNameContainsPath(llvm::StringRef path,
ConstString demangled) const {
MethodName demangled_name(demangled);
return demangled_name.ContainsPath(path);
}
-bool CPlusPlusLanguage::ExtractContextAndIdentifier(
- const char *name, llvm::StringRef &context, llvm::StringRef &identifier) {
- if (MSVCUndecoratedNameParser::IsMSVCUndecoratedName(name))
- return MSVCUndecoratedNameParser::ExtractContextAndIdentifier(name, context,
- identifier);
-
- CPlusPlusNameParser parser(name);
- if (auto full_name = parser.ParseAsFullName()) {
- identifier = full_name->basename;
- context = full_name->context;
- return true;
- }
- return false;
-}
-
namespace {
class NodeAllocator {
llvm::BumpPtrAllocator Alloc;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp
new file mode 100644
index 0000000000000..cd22b784de019
--- /dev/null
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguageMethod.cpp
@@ -0,0 +1,279 @@
+//===-- CPlusPlusLanguageMethod.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPlusPlusLanguage.h"
+
+#include "lldb/Core/Mangled.h"
+
+#include "CPlusPlusNameParser.h"
+#include "MSVCUndecoratedNameParser.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+void CPlusPlusLanguage::MethodName::Clear() {
+ m_full.Clear();
+ m_basename = llvm::StringRef();
+ m_context = llvm::StringRef();
+ m_arguments = llvm::StringRef();
+ m_qualifiers = llvm::StringRef();
+ m_return_type = llvm::StringRef();
+ m_parsed = false;
+ m_parse_error = false;
+}
+
+static bool ReverseFindMatchingChars(const llvm::StringRef &s,
+ const llvm::StringRef &left_right_chars,
+ size_t &left_pos, size_t &right_pos,
+ size_t pos = llvm::StringRef::npos) {
+ assert(left_right_chars.size() == 2);
+ left_pos = llvm::StringRef::npos;
+ const char left_char = left_right_chars[0];
+ const char right_char = left_right_chars[1];
+ pos = s.find_last_of(left_right_chars, pos);
+ if (pos == llvm::StringRef::npos || s[pos] == left_char)
+ return false;
+ right_pos = pos;
+ uint32_t depth = 1;
+ while (pos > 0 && depth > 0) {
+ pos = s.find_last_of(left_right_chars, pos);
+ if (pos == llvm::StringRef::npos)
+ return false;
+ if (s[pos] == left_char) {
+ if (--depth == 0) {
+ left_pos = pos;
+ return left_pos < right_pos;
+ }
+ } else if (s[pos] == right_char) {
+ ++depth;
+ }
+ }
+ return false;
+}
+
+static bool IsTrivialBasename(const llvm::StringRef &basename) {
+ // Check that the basename matches with the following regular expression
+ // "^~?([A-Za-z_][A-Za-z_0-9]*)$" We are using a hand written implementation
+ // because it is significantly more efficient then using the general purpose
+ // regular expression library.
+ size_t idx = 0;
+ if (basename.starts_with('~'))
+ idx = 1;
+
+ if (basename.size() <= idx)
+ return false; // Empty string or "~"
+
+ if (!std::isalpha(basename[idx]) && basename[idx] != '_')
+ return false; // First character (after removing the possible '~'') isn't in
+ // [A-Za-z_]
+
+ // Read all characters matching [A-Za-z_0-9]
+ ++idx;
+ while (idx < basename.size()) {
+ if (!std::isalnum(basename[idx]) && basename[idx] != '_')
+ break;
+ ++idx;
+ }
+
+ // We processed all characters. It is a vaild basename.
+ return idx == basename.size();
+}
+
+bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() {
+ // This method tries to parse simple method definitions which are presumably
+ // most comman in user programs. Definitions that can be parsed by this
+ // function don't have return types and templates in the name.
+ // A::B::C::fun(std::vector<T> &) const
+ size_t arg_start, arg_end;
+ llvm::StringRef full(m_full.GetCString());
+ llvm::StringRef parens("()", 2);
+ if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
+ m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
+ if (arg_end + 1 < full.size())
+ m_qualifiers = full.substr(arg_end + 1).ltrim();
+
+ if (arg_start == 0)
+ return false;
+ size_t basename_end = arg_start;
+ size_t context_start = 0;
+ size_t context_end = full.rfind(':', basename_end);
+ if (context_end == llvm::StringRef::npos)
+ m_basename = full.substr(0, basename_end);
+ else {
+ if (context_start < context_end)
+ m_context = full.substr(context_start, context_end - 1 - context_start);
+ const size_t basename_begin = context_end + 1;
+ m_basename = full.substr(basename_begin, basename_end - basename_begin);
+ }
+
+ if (IsTrivialBasename(m_basename)) {
+ return true;
+ } else {
+ // The C++ basename doesn't match our regular expressions so this can't
+ // be a valid C++ method, clear everything out and indicate an error
+ m_context = llvm::StringRef();
+ m_basename = llvm::StringRef();
+ m_arguments = llvm::StringRef();
+ m_qualifiers = llvm::StringRef();
+ m_return_type = llvm::StringRef();
+ return false;
+ }
+ }
+ return false;
+}
+
+void CPlusPlusLanguage::MethodName::Parse() {
+ if (!m_parsed && m_full) {
+ if (TrySimplifiedParse()) {
+ m_parse_error = false;
+ } else {
+ CPlusPlusNameParser parser(m_full.GetStringRef());
+ if (auto function = parser.ParseAsFunctionDefinition()) {
+ m_basename = function->name.basename;
+ m_context = function->name.context;
+ m_arguments = function->arguments;
+ m_qualifiers = function->qualifiers;
+ m_return_type = function->return_type;
+ m_parse_error = false;
+ } else {
+ m_parse_error = true;
+ }
+ }
+ m_parsed = true;
+ }
+}
+
+llvm::StringRef CPlusPlusLanguage::MethodName::GetBasename() {
+ if (!m_parsed)
+ Parse();
+ return m_basename;
+}
+
+llvm::StringRef CPlusPlusLanguage::MethodName::GetContext() {
+ if (!m_parsed)
+ Parse();
+ return m_context;
+}
+
+llvm::StringRef CPlusPlusLanguage::MethodName::GetArguments() {
+ if (!m_parsed)
+ Parse();
+ return m_arguments;
+}
+
+llvm::StringRef CPlusPlusLanguage::MethodName::GetQualifiers() {
+ if (!m_parsed)
+ Parse();
+ return m_qualifiers;
+}
+
+llvm::StringRef CPlusPlusLanguage::MethodName::GetReturnType() {
+ if (!m_parsed)
+ Parse();
+ return m_return_type;
+}
+
+std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() {
+ if (!m_parsed)
+ Parse();
+ if (m_context.empty())
+ return std::string(m_basename);
+
+ std::string res;
+ res += m_context;
+ res += "::";
+ res += m_basename;
+ return res;
+}
+
+llvm::StringRef
+CPlusPlusLanguage::MethodName::GetBasenameNoTemplateParameters() {
+ llvm::StringRef basename = GetBasename();
+ size_t arg_start, arg_end;
+ llvm::StringRef parens("<>", 2);
+ if (ReverseFindMatchingChars(basename, parens, arg_start, arg_end))
+ return basename.substr(0, arg_start);
+
+ return basename;
+}
+
+bool CPlusPlusLanguage::MethodName::ContainsPath(llvm::StringRef path) {
+ if (!m_parsed)
+ Parse();
+
+ // If we can't parse the incoming name, then just check that it contains path.
+ if (m_parse_error)
+ return m_full.GetStringRef().contains(path);
+
+ llvm::StringRef identifier;
+ llvm::StringRef context;
+ std::string path_str = path.str();
+ bool success = CPlusPlusLanguage::ExtractContextAndIdentifier(
+ path_str.c_str(), context, identifier);
+ if (!success)
+ return m_full.GetStringRef().contains(path);
+
+ // Basename may include template arguments.
+ // E.g.,
+ // GetBaseName(): func<int>
+ // identifier : func
+ //
+ // ...but we still want to account for identifiers with template parameter
+ // lists, e.g., when users set breakpoints on template specializations.
+ //
+ // E.g.,
+ // GetBaseName(): func<uint32_t>
+ // identifier : func<int32_t*>
+ //
+ // Try to match the basename with or without template parameters.
+ if (GetBasename() != identifier &&
+ GetBasenameNoTemplateParameters() != identifier)
+ return false;
+
+ // Incoming path only had an identifier, so we match.
+ if (context.empty())
+ return true;
+ // Incoming path has context but this method does not, no match.
+ if (m_context.empty())
+ return false;
+
+ llvm::StringRef haystack = m_context;
+ if (!haystack.consume_back(context))
+ return false;
+ if (haystack.empty() || !isalnum(haystack.back()))
+ return true;
+
+ return false;
+}
+
+bool CPlusPlusLanguage::IsCPPMangledName(llvm::StringRef name) {
+ // FIXME!! we should really run through all the known C++ Language plugins
+ // and ask each one if this is a C++ mangled name
+
+ Mangled::ManglingScheme scheme = Mangled::GetManglingScheme(name);
+
+ if (scheme == Mangled::eManglingSchemeNone)
+ return false;
+
+ return true;
+}
+
+bool CPlusPlusLanguage::ExtractContextAndIdentifier(
+ const char *name, llvm::StringRef &context, llvm::StringRef &identifier) {
+ if (MSVCUndecoratedNameParser::IsMSVCUndecoratedName(name))
+ return MSVCUndecoratedNameParser::ExtractContextAndIdentifier(name, context,
+ identifier);
+
+ CPlusPlusNameParser parser(name);
+ if (auto full_name = parser.ParseAsFullName()) {
+ identifier = full_name->basename;
+ context = full_name->context;
+ return true;
+ }
+ return false;
+}
More information about the lldb-commits
mailing list