[Lldb-commits] [lldb] [lldb] Support the Python stable C API in PythonString::AsUTF8 (PR #152212)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Tue Aug 5 15:13:19 PDT 2025
https://github.com/JDevlieghere created https://github.com/llvm/llvm-project/pull/152212
This conditionally reimplements `PythonString::AsUTF8` using `PyUnicode_AsUTF8String` instead of `PyUnicode_AsUTF8AndSize`. The latter is slightly more efficient because it caches the result, which also means we can return an `llvm::StringRef` instead of having to allocate a `std::string`.
We pick the most efficient one depending on whether or not we're targeting the Python 3.10 stable API, however we now always return the result as a `std::string`.
>From f47d48db4be67ebeef822ad58a720aa45cfcd9c7 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Tue, 5 Aug 2025 14:49:30 -0700
Subject: [PATCH] [lldb] Support the Python stable C API in
PythonString::AsUTF8
This conditionally reimplements PythonString::AsUTF8 using
PyUnicode_AsUTF8String instead of PyUnicode_AsUTF8AndSize. The latter is
slightly more efficient because it caches the result, which also means
we can return a StringRef instead of having to allocate a std::string.
We pick the most efficient one depending on whether or not we're
targeting the Python 3.10 stable API, however we now always return the
result as a std::string.
---
lldb/bindings/python/python-wrapper.swig | 14 +++----
.../Python/PythonDataObjects.cpp | 38 ++++++++++++-------
.../Python/PythonDataObjects.h | 4 +-
.../Python/ScriptInterpreterPython.cpp | 8 ++--
4 files changed, 38 insertions(+), 26 deletions(-)
diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig
index 2c30d536a753d..7f00ef43913f8 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -173,7 +173,7 @@ bool lldb_private::python::SWIGBridge::LLDBSwigPythonCallTypeScript(
else
result = pfunc(value_arg, dict, SWIGBridge::ToSWIGWrapper(*options_sp));
- retval = result.Str().GetString().str();
+ retval = result.Str().GetString();
return true;
}
@@ -604,7 +604,7 @@ lldb_private::python::SWIGBridge::LLDBSwigPythonGetRepeatCommandForScriptedComma
if (result.IsNone())
return std::nullopt;
- return result.Str().GetString().str();
+ return result.Str().GetString();
}
StructuredData::DictionarySP
@@ -807,7 +807,7 @@ bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordProcess(
auto result = pfunc(SWIGBridge::ToSWIGWrapper(process), dict);
- output = result.Str().GetString().str();
+ output = result.Str().GetString();
return true;
}
@@ -831,7 +831,7 @@ std::optional<std::string> lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(thread)), dict);
- return result.Str().GetString().str();
+ return result.Str().GetString();
}
bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
@@ -854,7 +854,7 @@ bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordTarget(
auto result = pfunc(SWIGBridge::ToSWIGWrapper(target), dict);
- output = result.Str().GetString().str();
+ output = result.Str().GetString();
return true;
}
@@ -878,7 +878,7 @@ std::optional<std::string> lldb_private::python::SWIGBridge::LLDBSWIGPythonRunSc
auto result = pfunc(SWIGBridge::ToSWIGWrapper(std::move(frame)), dict);
- return result.Str().GetString().str();
+ return result.Str().GetString();
}
bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
@@ -901,7 +901,7 @@ bool lldb_private::python::SWIGBridge::LLDBSWIGPythonRunScriptKeywordValue(
auto result = pfunc(SWIGBridge::ToSWIGWrapper(value), dict);
- output = result.Str().GetString().str();
+ output = result.Str().GetString();
return true;
}
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 4a45673a6ebbc..7845e56c8c05c 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -405,28 +405,40 @@ bool PythonString::Check(PyObject *py_obj) {
return false;
}
-llvm::StringRef PythonString::GetString() const {
+std::string PythonString::GetString() const {
auto s = AsUTF8();
if (!s) {
llvm::consumeError(s.takeError());
- return llvm::StringRef("");
+ return "";
}
return s.get();
}
-Expected<llvm::StringRef> PythonString::AsUTF8() const {
+Expected<std::string> PythonString::AsUTF8() const {
if (!IsValid())
return nullDeref();
- Py_ssize_t size;
- const char *data;
+ // PyUnicode_AsUTF8AndSize caches the UTF-8 representation of the string in
+ // the Unicode object, making it more efficient than PyUnicode_AsUTF8String.
+#if defined(Py_LIMITED_API) && (Py_LIMITED_API < 0x030a0000)
+ PyObject *py_bytes = PyUnicode_AsUTF8String(m_py_obj);
+ if (!py_bytes)
+ return exception();
- data = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+ auto release_py_str =
+ llvm::make_scope_exit([py_bytes] { Py_DECREF(py_bytes); });
- if (!data)
+ Py_ssize_t size = PyBytes_Size(py_bytes);
+ const char *str = PyBytes_AsString(py_bytes);
+#else
+ Py_ssize_t size;
+ const char *str = PyUnicode_AsUTF8AndSize(m_py_obj, &size);
+#endif
+
+ if (!str)
return exception();
- return llvm::StringRef(data, size);
+ return std::string(str, size);
}
size_t PythonString::GetSize() const {
@@ -1248,12 +1260,12 @@ class TextPythonFile : public PythonIOFile {
// EOF
return Status();
}
- auto stringref = pystring.get().AsUTF8();
- if (!stringref)
+ auto str = pystring.get().AsUTF8();
+ if (!str)
// Cloning since the wrapped exception may still reference the PyThread.
- return Status::FromError(stringref.takeError()).Clone();
- num_bytes = stringref.get().size();
- memcpy(buf, stringref.get().begin(), num_bytes);
+ return Status::FromError(str.takeError()).Clone();
+ num_bytes = str->size();
+ memcpy(buf, str->c_str(), num_bytes);
return Status();
}
};
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
index 45bb499248329..9586eedd03176 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
@@ -460,9 +460,9 @@ class PythonString : public TypedPythonObject<PythonString> {
static bool Check(PyObject *py_obj);
- llvm::StringRef GetString() const; // safe, empty string on error
+ std::string GetString() const; // safe, empty string on error
- llvm::Expected<llvm::StringRef> AsUTF8() const;
+ llvm::Expected<std::string> AsUTF8() const;
size_t GetSize() const;
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index 24d604f22a765..b98ff035e5798 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -2715,8 +2715,8 @@ bool ScriptInterpreterPythonImpl::GetShortHelpForCommandObject(
if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
PythonString py_string(PyRefType::Borrowed, py_return.get());
- llvm::StringRef return_data(py_string.GetString());
- dest.assign(return_data.data(), return_data.size());
+ std::string return_data = py_string.GetString();
+ dest.assign(return_data.c_str(), return_data.size());
return true;
}
@@ -2996,8 +2996,8 @@ bool ScriptInterpreterPythonImpl::GetLongHelpForCommandObject(
bool got_string = false;
if (py_return.IsAllocated() && PythonString::Check(py_return.get())) {
PythonString str(PyRefType::Borrowed, py_return.get());
- llvm::StringRef str_data(str.GetString());
- dest.assign(str_data.data(), str_data.size());
+ std::string str_data = str.GetString();
+ dest.assign(str_data.c_str(), str_data.size());
got_string = true;
}
More information about the lldb-commits
mailing list