[Lldb-commits] [lldb] Summarize std::string's when created from data. (PR #89110)

Jacob Lalonde via lldb-commits lldb-commits at lists.llvm.org
Wed Apr 24 13:53:45 PDT 2024


https://github.com/Jlalond updated https://github.com/llvm/llvm-project/pull/89110

>From e0316188d22605c670079e37855d3d8b5c944cee Mon Sep 17 00:00:00 2001
From: Jacob John Lalonde <jalalonde at fb.com>
Date: Wed, 10 Apr 2024 14:33:40 -0700
Subject: [PATCH 1/3] Fix bug where an sbvalue containing a std::string created
 from data would not use the built in summary provider, but default to the
 valueobjectprinter

std::string children are normally formatted as their summary [my_string_prop] = "hello world".
Sbvalues created from data do not follow the same summary formatter path and instead hit the value object printer.
This change fixes that and adds tests for the future.

Added tests in TestDataFormatterStdString.py and a formatter for a custom struct.
---
 .../Plugins/Language/CPlusPlus/LibStdcpp.cpp  | 31 ++++++++++--
 .../string/ConvertToDataFormatter.py          | 50 +++++++++++++++++++
 .../string/TestDataFormatterStdString.py      | 28 +++++++++++
 .../libstdcpp/string/main.cpp                 | 13 +++++
 4 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/ConvertToDataFormatter.py

diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
index 86bb575af5ca34..0da01e9ca07fb6 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
@@ -254,13 +254,13 @@ bool lldb_private::formatters::LibStdcppStringSummaryProvider(
   } else
     addr_of_string =
         valobj.GetAddressOf(scalar_is_load_addr, &addr_type);
-  if (addr_of_string != LLDB_INVALID_ADDRESS) {
+  if (addr_of_string != LLDB_INVALID_ADDRESS ||
+        addr_type == eAddressTypeHost) {
     switch (addr_type) {
     case eAddressTypeLoad: {
       ProcessSP process_sp(valobj.GetProcessSP());
       if (!process_sp)
         return false;
-
       StringPrinter::ReadStringAndDumpToStreamOptions options(valobj);
       Status error;
       lldb::addr_t addr_of_data =
@@ -287,8 +287,31 @@ bool lldb_private::formatters::LibStdcppStringSummaryProvider(
       } else
         return true;
     } break;
-    case eAddressTypeHost:
-      break;
+    case eAddressTypeHost: {
+      // We have the host address of our std::string
+      // But we need to read the pointee data from the debugged process.
+      ProcessSP process_sp(valobj.GetProcessSP());
+      DataExtractor data;
+      Status error;
+      valobj.GetData(data, error);
+      if (error.Fail())
+        return false;
+      // We want to read the address from std::string, which is the first 8 bytes.
+      lldb::offset_t offset = 0;
+      lldb::addr_t addr = data.GetAddress(&offset);
+      if (!addr)
+      {
+        stream.Printf("nullptr");
+        return true;
+      }
+
+      std::string contents;
+      process_sp->ReadCStringFromMemory(addr, contents, error);
+      if (error.Fail())
+        return false;
+      stream.Printf("%s", contents.c_str());
+      return true;
+    } break;
     case eAddressTypeInvalid:
     case eAddressTypeFile:
       break;
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/ConvertToDataFormatter.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/ConvertToDataFormatter.py
new file mode 100644
index 00000000000000..57e42c920f8294
--- /dev/null
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/ConvertToDataFormatter.py
@@ -0,0 +1,50 @@
+# coding=utf8
+"""
+Helper formmater to verify Std::String by created via SBData
+"""
+
+import lldb
+
+class SyntheticFormatter:
+    def __init__(self, valobj, dict):
+        self.valobj = valobj
+
+    def num_children(self):
+        return 6
+
+    def has_children(self):
+        return True
+
+    def get_child_at_index(self, index):
+        name = None
+        match index:
+            case 0:
+                name = "short_string"
+            case 1:
+                name = "long_string"
+            case 2:
+                name = "short_string_ptr"
+            case 3:
+                name = "long_string_ptr"
+            case 4:
+                name = "short_string_ref"
+            case 5:
+                name = "long_string_ref"
+            case _:
+                return None
+
+        child = self.valobj.GetChildMemberWithName(name)
+        valType = child.GetType()
+        return self.valobj.CreateValueFromData(name,
+                child.GetData(),
+                valType)
+
+
+def __lldb_init_module(debugger, dict):
+    typeName = "string_container"
+    debugger.HandleCommand(
+        'type synthetic add -x "'
+        + typeName
+        + '" --python-class '
+        + f"{__name__}.SyntheticFormatter"
+    )
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py
index 0b78fb7dc3911c..2adfe47f9a3cfd 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py
@@ -37,6 +37,8 @@ def test_with_run_command(self):
             substrs=["stopped", "stop reason = breakpoint"],
         )
 
+        self.runCmd("command script import ./ConvertToDataFormatter.py", check=True)
+
         # This is the function to remove the custom formats in order to have a
         # clean slate for the next test case.
         def cleanup():
@@ -60,6 +62,7 @@ def cleanup():
         var_rQ = self.frame().FindVariable("rQ")
         var_pq = self.frame().FindVariable("pq")
         var_pQ = self.frame().FindVariable("pQ")
+        var_str_container = self.frame().FindVariable("sc")
 
         self.assertEqual(var_wempty.GetSummary(), 'L""', "wempty summary wrong")
         self.assertEqual(
@@ -89,6 +92,31 @@ def cleanup():
             "pQ summary wrong",
         )
 
+        self.assertEqual(
+            var_str_container.GetChildAtIndex(0).GetSummary(),
+            '"u22"',
+            "string container child wrong")
+        self.assertEqual(
+            var_str_container.GetChildAtIndex(1).GetSummary(),
+            '"quite a long std::string with lots of info inside it inside a struct"',
+            "string container child wrong")
+        self.assertEqual(
+            var_str_container.GetChildAtIndex(2).GetSummary(),
+            '"u22"',
+            "string container child wrong")
+        self.assertEqual(
+            var_str_container.GetChildAtIndex(3).GetSummary(),
+            '"quite a long std::string with lots of info inside it inside a struct"',
+            "string container child wrong")
+        self.assertEqual(
+            var_str_container.GetChildAtIndex(4).GetSummary(),
+            '"u22"',
+            "string container child wrong")
+        self.assertEqual(
+            var_str_container.GetChildAtIndex(5).GetSummary(),
+            '"quite a long std::string with lots of info inside it inside a struct"',
+            "string container child wrong")
+
         self.runCmd("next")
 
         self.assertEqual(var_S.GetSummary(), 'L"!!!!!"', "new S summary wrong")
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/main.cpp
index eefb96c4573e4e..b169a2e6945568 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/main.cpp
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/main.cpp
@@ -1,5 +1,14 @@
 #include <string>
 
+struct string_container {
+    std::string short_string;
+    std::string long_string;
+    std::string *short_string_ptr = &short_string;
+    std::string *long_string_ptr = &long_string;
+    std::string &short_string_ref = short_string;
+    std::string &long_string_ref = long_string;
+};
+
 int main()
 {
     std::wstring wempty(L"");
@@ -11,6 +20,10 @@ int main()
     std::string Q("quite a long std::strin with lots of info inside it");
     auto &rq = q, &rQ = Q;
     std::string *pq = &q, *pQ = &Q;
+    string_container sc;
+    sc.short_string = "u22";
+    sc.long_string = "quite a long std::string with lots of info inside it inside a struct";
+
     S.assign(L"!!!!!"); // Set break point at this line.
     return 0;
 }

>From 34b26c16144f3870e1d1eb0083d6e603bf0c1213 Mon Sep 17 00:00:00 2001
From: Jacob John Lalonde <jalalonde at fb.com>
Date: Wed, 17 Apr 2024 14:54:12 -0700
Subject: [PATCH 2/3] Add requested comment | Support when the value object's
 children are also in memory

---
 .../Plugins/Language/CPlusPlus/LibStdcpp.cpp  | 51 ++++++++++++++-----
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
index 0da01e9ca07fb6..47bf6eb646828d 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.cpp
@@ -254,6 +254,10 @@ bool lldb_private::formatters::LibStdcppStringSummaryProvider(
   } else
     addr_of_string =
         valobj.GetAddressOf(scalar_is_load_addr, &addr_type);
+
+  // We have to check for host address here
+  // because GetAddressOf returns INVALID for all non load addresses.
+  // But we can still format strings in host memory.
   if (addr_of_string != LLDB_INVALID_ADDRESS ||
         addr_type == eAddressTypeHost) {
     switch (addr_type) {
@@ -288,29 +292,50 @@ bool lldb_private::formatters::LibStdcppStringSummaryProvider(
         return true;
     } break;
     case eAddressTypeHost: {
-      // We have the host address of our std::string
-      // But we need to read the pointee data from the debugged process.
-      ProcessSP process_sp(valobj.GetProcessSP());
+
       DataExtractor data;
       Status error;
       valobj.GetData(data, error);
       if (error.Fail())
         return false;
-      // We want to read the address from std::string, which is the first 8 bytes.
+
       lldb::offset_t offset = 0;
-      lldb::addr_t addr = data.GetAddress(&offset);
-      if (!addr)
+      AddressType child_addressType = valobj.GetAddressTypeOfChildren();
+      if (child_addressType == eAddressTypeLoad)
       {
-        stream.Printf("nullptr");
+        // We have the host address of our std::string
+        // But we need to read the pointee data from the debugged process.
+        ProcessSP process_sp(valobj.GetProcessSP());
+        // We want to read the address from std::string, which is the first 8 bytes.
+        lldb::addr_t addr = data.GetAddress(&offset);
+        if (!addr)
+        {
+          stream.Printf("nullptr");
+          return true;
+        }
+        std::string contents;
+        process_sp->ReadCStringFromMemory(addr, contents, error);
+        if (error.Fail())
+          return false;
+
+        stream.Printf("%s", contents.c_str());
         return true;
       }
 
-      std::string contents;
-      process_sp->ReadCStringFromMemory(addr, contents, error);
-      if (error.Fail())
-        return false;
-      stream.Printf("%s", contents.c_str());
-      return true;
+      if (child_addressType == eAddressTypeHost)
+      {
+        lldb::offset_t size = data.GetByteSize();
+        const void* dataStart = data.GetData(&offset, size);
+        if (!dataStart)
+          return false;
+
+        const std::string* str = static_cast<const std::string*>(dataStart);
+        stream.Printf("%s", str->c_str());
+        return true;
+      }
+
+      // Fall through if the child address type is file or invalid.
+      return false;
     } break;
     case eAddressTypeInvalid:
     case eAddressTypeFile:

>From abc756dee88b24712f3c6658637fd334af281889 Mon Sep 17 00:00:00 2001
From: Jacob John Lalonde <jalalonde at fb.com>
Date: Wed, 24 Apr 2024 13:52:10 -0700
Subject: [PATCH 3/3] Split data out into it's own test case, and added minor
 refactoring to the test class to reduce code duplication

---
 .../string/TestDataFormatterStdString.py      | 57 +++++++++++++------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py
index 2adfe47f9a3cfd..8dbd4f4ae0b85a 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libstdcpp/string/TestDataFormatterStdString.py
@@ -17,6 +17,15 @@ def setUp(self):
         # Find the line number to break at.
         self.line = line_number("main.cpp", "// Set break point at this line.")
 
+    # This is the function to remove the custom formats in order to have a
+    # clean slate for the next test case.
+    def cleanup(self):
+        self.runCmd("type format clear", check=False)
+        self.runCmd("type summary clear", check=False)
+        self.runCmd("type filter clear", check=False)
+        self.runCmd("type synth clear", check=False)
+        self.runCmd("settings set target.max-children-count 256", check=False)
+
     @add_test_categories(["libstdcxx"])
     @expectedFailureAll(bugnumber="llvm.org/pr50861", compiler="gcc")
     def test_with_run_command(self):
@@ -37,19 +46,9 @@ def test_with_run_command(self):
             substrs=["stopped", "stop reason = breakpoint"],
         )
 
-        self.runCmd("command script import ./ConvertToDataFormatter.py", check=True)
-
-        # This is the function to remove the custom formats in order to have a
-        # clean slate for the next test case.
-        def cleanup():
-            self.runCmd("type format clear", check=False)
-            self.runCmd("type summary clear", check=False)
-            self.runCmd("type filter clear", check=False)
-            self.runCmd("type synth clear", check=False)
-            self.runCmd("settings set target.max-children-count 256", check=False)
 
         # Execute the cleanup function during test case tear down.
-        self.addTearDownHook(cleanup)
+        self.addTearDownHook(self.cleanup())
 
         var_wempty = self.frame().FindVariable("wempty")
         var_s = self.frame().FindVariable("s")
@@ -62,7 +61,6 @@ def cleanup():
         var_rQ = self.frame().FindVariable("rQ")
         var_pq = self.frame().FindVariable("pq")
         var_pQ = self.frame().FindVariable("pQ")
-        var_str_container = self.frame().FindVariable("sc")
 
         self.assertEqual(var_wempty.GetSummary(), 'L""', "wempty summary wrong")
         self.assertEqual(
@@ -92,6 +90,37 @@ def cleanup():
             "pQ summary wrong",
         )
 
+        self.runCmd("next")
+
+        self.assertEqual(var_S.GetSummary(), 'L"!!!!!"', "new S summary wrong")
+
+
+    @add_test_categories(["libstdcxx"])
+    @expectedFailureAll(bugnumber="llvm.org/pr50861", compiler="gcc")
+    def test_std_string_as_data(self):
+        """Test that strings created via SBValue::CreateFromData display correctly."""
+        self.build()
+        self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET)
+
+        lldbutil.run_break_set_by_file_and_line(
+            self, "main.cpp", self.line, num_expected_locations=-1
+        )
+
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        self.runCmd("command script import ./ConvertToDataFormatter.py", check=True)
+
+        self.expect(
+            "thread list",
+            STOPPED_DUE_TO_BREAKPOINT,
+            substrs=["stopped", "stop reason = breakpoint"],
+        )
+
+        # Execute the cleanup function during test case tear down.
+        self.addTearDownHook(self.cleanup())
+
+        var_str_container = self.frame().FindVariable("sc")
+
         self.assertEqual(
             var_str_container.GetChildAtIndex(0).GetSummary(),
             '"u22"',
@@ -116,7 +145,3 @@ def cleanup():
             var_str_container.GetChildAtIndex(5).GetSummary(),
             '"quite a long std::string with lots of info inside it inside a struct"',
             "string container child wrong")
-
-        self.runCmd("next")
-
-        self.assertEqual(var_S.GetSummary(), 'L"!!!!!"', "new S summary wrong")



More information about the lldb-commits mailing list