[Lldb-commits] [lldb] [lldb][test] Split out libc++ std::string tests that check corrupted strings (PR #147252)
Michael Buch via lldb-commits
lldb-commits at lists.llvm.org
Mon Jul 7 03:02:34 PDT 2025
https://github.com/Michael137 updated https://github.com/llvm/llvm-project/pull/147252
>From 81f856c7c851d5b03ddd176977e7d0c55e7c7f6f Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Sun, 6 Jul 2025 09:47:39 +0100
Subject: [PATCH 1/2] [lldb][test] Split out libc++ std::string tests that
check corrupted strings
---
.../libcxx/invalid-string/Makefile | 5 +
.../TestDataFormatterLibcxxString.py | 39 +++++++
.../libcxx/invalid-string/main.cpp | 110 ++++++++++++++++++
.../string/TestDataFormatterLibcxxString.py | 21 ----
.../data-formatter-stl/libcxx/string/main.cpp | 103 ----------------
5 files changed, 154 insertions(+), 124 deletions(-)
create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile
create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxString.py
create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile
new file mode 100644
index 0000000000000..c5df567e01a2a
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile
@@ -0,0 +1,5 @@
+CXX_SOURCES := main.cpp
+
+USE_LIBCPP := 1
+
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxString.py
new file mode 100644
index 0000000000000..497a495106575
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxString.py
@@ -0,0 +1,39 @@
+"""
+Test lldb behaves sanely when formatting corrupted `std::string`s.
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class LibcxxInvalidStringDataFormatterTestCase(TestBase):
+ @add_test_categories(["libc++"])
+ @skipUnlessDarwin
+ @skipIf(archs=no_match(["arm"]))
+ def test(self):
+ self.build()
+
+ (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
+ self, "Set break point at this line.", lldb.SBFileSpec("main.cpp")
+ )
+ frame = thread.frames[0]
+
+ if not self.process().GetAddressByteSize() == 8:
+ self.skip()
+
+ # The test assumes that std::string is in its cap-size-data layout.
+ self.expect(
+ "frame variable garbage1", substrs=["garbage1 = Summary Unavailable"]
+ )
+ self.expect(
+ "frame variable garbage2", substrs=[r'garbage2 = "\xfa\xfa\xfa\xfa"']
+ )
+ self.expect("frame variable garbage3", substrs=[r'garbage3 = "\xf0\xf0"'])
+ self.expect(
+ "frame variable garbage4", substrs=["garbage4 = Summary Unavailable"]
+ )
+ self.expect(
+ "frame variable garbage5", substrs=["garbage5 = Summary Unavailable"]
+ )
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp
new file mode 100644
index 0000000000000..eb3efe1bcb7ef
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp
@@ -0,0 +1,110 @@
+#include <cstdio>
+#include <cstdlib>
+#include <stdint.h>
+#include <string>
+
+// For more information about libc++'s std::string ABI, see:
+//
+// https://joellaity.com/2020/01/31/string.html
+
+// A corrupt string which hits the SSO code path, but has an invalid size.
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+ // Set the size of this short-mode string to 116. Note that in short mode,
+ // the size is encoded as `size << 1`.
+ unsigned char size = 232;
+
+ // 23 garbage bytes for the inline string payload.
+ char inline_buf[23] = {0};
+#else // _LIBCPP_ABI_VERSION == 1
+ // Like above, but data comes first, and use bitfields to indicate size.
+ char inline_buf[23] = {0};
+ unsigned char size : 7 = 116;
+ unsigned char is_long : 1 = 0;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_short_mode;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's inherently too long.
+static unsigned char garbage_utf8_payload1[] = {
+ 250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
+ // should fall back to ASCII printing.
+ 250, 250, 250};
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+ uint64_t cap = 5;
+ uint64_t size = 4;
+ unsigned char *data = &garbage_utf8_payload1[0];
+#else // _LIBCPP_ABI_VERSION == 1
+ unsigned char *data = &garbage_utf8_payload1[0];
+ uint64_t size = 4;
+ uint64_t cap : 63 = 4;
+ uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode1;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's too long to fit in the buffer.
+static unsigned char garbage_utf8_payload2[] = {
+ 240, // This means that we expect a 4-byte sequence, but the buffer is too
+ // small for this. LLDB should fall back to ASCII printing.
+ 240};
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+ uint64_t cap = 3;
+ uint64_t size = 2;
+ unsigned char *data = &garbage_utf8_payload2[0];
+#else // _LIBCPP_ABI_VERSION == 1
+ unsigned char *data = &garbage_utf8_payload2[0];
+ uint64_t size = 2;
+ uint64_t cap : 63 = 3;
+ uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode2;
+
+// A corrupt libcxx string which has an invalid size (i.e. a size greater than
+// the capacity of the string).
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+ uint64_t cap = 5;
+ uint64_t size = 7;
+ const char *data = "foo";
+#else // _LIBCPP_ABI_VERSION == 1
+ const char *data = "foo";
+ uint64_t size = 7;
+ uint64_t cap : 63 = 5;
+ uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode3;
+
+// A corrupt libcxx string in long mode with a payload that would trigger a
+// buffer overflow.
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+ uint64_t cap = 5;
+ uint64_t size = 2;
+ uint64_t data = 0xfffffffffffffffeULL;
+#else // _LIBCPP_ABI_VERSION == 1
+ uint64_t data = 0xfffffffffffffffeULL;
+ uint64_t size = 2;
+ uint64_t cap : 63 = 5;
+ uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode4;
+
+int main() {
+ std::string garbage1, garbage2, garbage3, garbage4, garbage5;
+ if (sizeof(std::string) == sizeof(garbage_string_short_mode))
+ memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
+ memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
+ memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
+ memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
+ memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
+
+ std::puts("// Set break point at this line.");
+ return 0;
+}
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
index 32764629d65a7..2f7fc88e0f449 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
@@ -42,8 +42,6 @@ def cleanup():
self.runCmd("type filter clear", check=False)
self.runCmd("type synth clear", check=False)
- is_64_bit = self.process().GetAddressByteSize() == 8
-
# Execute the cleanup function during test case tear down.
self.addTearDownHook(cleanup)
@@ -126,25 +124,6 @@ def cleanup():
],
)
- # The test assumes that std::string is in its cap-size-data layout.
- is_alternate_layout = (
- "arm" in self.getArchitecture()
- ) and self.platformIsDarwin()
- if is_64_bit and not is_alternate_layout:
- self.expect(
- "frame variable garbage1", substrs=["garbage1 = Summary Unavailable"]
- )
- self.expect(
- "frame variable garbage2", substrs=[r'garbage2 = "\xfa\xfa\xfa\xfa"']
- )
- self.expect("frame variable garbage3", substrs=[r'garbage3 = "\xf0\xf0"'])
- self.expect(
- "frame variable garbage4", substrs=["garbage4 = Summary Unavailable"]
- )
- self.expect(
- "frame variable garbage5", substrs=["garbage5 = Summary Unavailable"]
- )
-
# Finally, make sure that if the string is not readable, we give an error:
bkpt_2 = target.BreakpointCreateBySourceRegex(
"Break here to look at bad string", self.main_spec
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
index f9f1c0802e518..373f817a80f7d 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,97 +1,6 @@
#include <string>
#include <stdint.h>
-// For more information about libc++'s std::string ABI, see:
-//
-// https://joellaity.com/2020/01/31/string.html
-
-// A corrupt string which hits the SSO code path, but has an invalid size.
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
- // Set the size of this short-mode string to 116. Note that in short mode,
- // the size is encoded as `size << 1`.
- unsigned char size = 232;
-
- // 23 garbage bytes for the inline string payload.
- char inline_buf[23] = {0};
-#else // _LIBCPP_ABI_VERSION == 1
- // Like above, but data comes first, and use bitfields to indicate size.
- char inline_buf[23] = {0};
- unsigned char size : 7 = 116;
- unsigned char is_long : 1 = 0;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_short_mode;
-
-// A corrupt libcxx string in long mode with a payload that contains a utf8
-// sequence that's inherently too long.
-static unsigned char garbage_utf8_payload1[] = {
- 250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
- // should fall back to ASCII printing.
- 250, 250, 250
-};
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
- uint64_t cap = 5;
- uint64_t size = 4;
- unsigned char *data = &garbage_utf8_payload1[0];
-#else // _LIBCPP_ABI_VERSION == 1
- unsigned char *data = &garbage_utf8_payload1[0];
- uint64_t size = 4;
- uint64_t cap : 63 = 4;
- uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode1;
-
-// A corrupt libcxx string in long mode with a payload that contains a utf8
-// sequence that's too long to fit in the buffer.
-static unsigned char garbage_utf8_payload2[] = {
- 240, // This means that we expect a 4-byte sequence, but the buffer is too
- // small for this. LLDB should fall back to ASCII printing.
- 240
-};
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
- uint64_t cap = 3;
- uint64_t size = 2;
- unsigned char *data = &garbage_utf8_payload2[0];
-#else // _LIBCPP_ABI_VERSION == 1
- unsigned char *data = &garbage_utf8_payload2[0];
- uint64_t size = 2;
- uint64_t cap : 63 = 3;
- uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode2;
-
-// A corrupt libcxx string which has an invalid size (i.e. a size greater than
-// the capacity of the string).
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
- uint64_t cap = 5;
- uint64_t size = 7;
- const char *data = "foo";
-#else // _LIBCPP_ABI_VERSION == 1
- const char *data = "foo";
- uint64_t size = 7;
- uint64_t cap : 63 = 5;
- uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode3;
-
-// A corrupt libcxx string in long mode with a payload that would trigger a
-// buffer overflow.
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
- uint64_t cap = 5;
- uint64_t size = 2;
- uint64_t data = 0xfffffffffffffffeULL;
-#else // _LIBCPP_ABI_VERSION == 1
- uint64_t data = 0xfffffffffffffffeULL;
- uint64_t size = 2;
- uint64_t cap : 63 = 5;
- uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode4;
-
size_t touch_string(std::string &in_str)
{
return in_str.size(); // Break here to look at bad string
@@ -115,18 +24,6 @@ int main()
std::u32string u32_empty(U"");
std::string *null_str = nullptr;
- std::string garbage1, garbage2, garbage3, garbage4, garbage5;
- if (sizeof(std::string) == sizeof(garbage_string_short_mode))
- memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
- if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
- memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
- if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
- memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
- if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
- memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
- if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
- memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
-
S.assign(L"!!!!!"); // Set break point at this line.
std::string *not_a_string = (std::string *) 0x0;
touch_string(*not_a_string);
>From 8b4bf2269531c1fd50849e5566af926a0020233f Mon Sep 17 00:00:00 2001
From: Michael Buch <michaelbuch12 at gmail.com>
Date: Mon, 7 Jul 2025 11:00:54 +0100
Subject: [PATCH 2/2] fixup! rename test file; skip on Darwin+arm
---
...ibcxxString.py => TestDataFormatterLibcxxInvalidString.py} | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
rename lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/{TestDataFormatterLibcxxString.py => TestDataFormatterLibcxxInvalidString.py} (92%)
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py
similarity index 92%
rename from lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxString.py
rename to lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py
index 497a495106575..e681cde5d16bf 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxString.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py
@@ -10,8 +10,8 @@
class LibcxxInvalidStringDataFormatterTestCase(TestBase):
@add_test_categories(["libc++"])
- @skipUnlessDarwin
- @skipIf(archs=no_match(["arm"]))
+ @skipIf(oslist=[lldbplatformutil.getDarwinOSTriples()],
+ archs=["arm", "aarch64"])
def test(self):
self.build()
More information about the lldb-commits
mailing list