[Lldb-commits] [lldb] ca0ce99 - [lldb] Print embedded nuls in char arrays (PR44649)

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Thu Oct 14 00:52:55 PDT 2021


Author: Pavel Labath
Date: 2021-10-14T09:50:40+02:00
New Revision: ca0ce99fc87c9a49b4c4a69cc6e88594bf6eb13c

URL: https://github.com/llvm/llvm-project/commit/ca0ce99fc87c9a49b4c4a69cc6e88594bf6eb13c
DIFF: https://github.com/llvm/llvm-project/commit/ca0ce99fc87c9a49b4c4a69cc6e88594bf6eb13c.diff

LOG: [lldb] Print embedded nuls in char arrays (PR44649)

When we know the bounds of the array, print any embedded nuls instead of
treating them as terminators. An exception to this rule is made for the
nul character at the very end of the string. We don't print that, as
otherwise 99% of the strings would end in \0. This way the strings
usually come out the same as how the user typed it into the compiler
(char foo[] = "with\0nuls"). It also matches how they come out in gdb.

This resolves a FIXME left from D111399, and leaves another FIXME for dealing
with nul characters in "escape-non-printables=false" mode. In this mode the
characters cause the entire summary string to be terminated prematurely.

Differential Revision: https://reviews.llvm.org/D111634

Added: 
    

Modified: 
    lldb/source/Core/ValueObject.cpp
    lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
    lldb/test/API/functionalities/data-formatter/stringprinter/main.cpp
    lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_const_value.s

Removed: 
    


################################################################################
diff  --git a/lldb/source/Core/ValueObject.cpp b/lldb/source/Core/ValueObject.cpp
index 9c1ba99da1d05..6794d0c7331d3 100644
--- a/lldb/source/Core/ValueObject.cpp
+++ b/lldb/source/Core/ValueObject.cpp
@@ -849,8 +849,10 @@ bool ValueObject::SetData(DataExtractor &data, Status &error) {
 
 static bool CopyStringDataToBufferSP(const StreamString &source,
                                      lldb::DataBufferSP &destination) {
-  destination = std::make_shared<DataBufferHeap>(source.GetSize() + 1, 0);
-  memcpy(destination->GetBytes(), source.GetString().data(), source.GetSize());
+  llvm::StringRef src = source.GetString();
+  src.consume_back(llvm::StringRef("\0", 1));
+  destination = std::make_shared<DataBufferHeap>(src.size(), 0);
+  memcpy(destination->GetBytes(), src.data(), src.size());
   return true;
 }
 
@@ -912,8 +914,8 @@ ValueObject::ReadPointedString(lldb::DataBufferSP &buffer_sp, Status &error,
           CopyStringDataToBufferSP(s, buffer_sp);
           return {0, was_capped};
         }
-        buffer_sp = std::make_shared<DataBufferHeap>(cstr_len, 0);
-        memcpy(buffer_sp->GetBytes(), cstr, cstr_len);
+        s << llvm::StringRef(cstr, cstr_len);
+        CopyStringDataToBufferSP(s, buffer_sp);
         return {cstr_len, was_capped};
       } else {
         s << "<invalid address>";
@@ -1196,6 +1198,7 @@ bool ValueObject::DumpPrintableRepresentation(
         options.SetQuote('"');
         options.SetSourceSize(buffer_sp->GetByteSize());
         options.SetIsTruncated(read_string.second);
+        options.SetBinaryZeroIsTerminator(custom_format != eFormatVectorOfChar);
         formatters::StringPrinter::ReadBufferAndDumpToStream<
             lldb_private::formatters::StringPrinter::StringElementType::ASCII>(
             options);

diff  --git a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
index 741acd0431bd0..c894b80228cfe 100644
--- a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
+++ b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py
@@ -90,8 +90,8 @@ def test(self):
 
         # Different character arrays.
         # FIXME: Passing a 'const char *' will ignore any given format,
-        self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("character array", "cstring"))
-        self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("c-string", "cstring"))
+        self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("character array", "cstring"))
+        self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("c-string", "cstring"))
         self.assertIn(' = " \\e\\a\\b\\f\\n\\r\\t\\vaA09" " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n',
                       self.getFormatted("c-string", "(char *)cstring"))
         self.assertIn('=\n', self.getFormatted("c-string", "(__UINT64_TYPE__)0"))
@@ -132,10 +132,10 @@ def test(self):
         self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("OSType", string_expr))
 
         # bytes
-        self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes", "cstring"))
+        self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes", "cstring"))
 
         # bytes with ASCII
-        self.assertIn('= " \\U0000001b\\a\\b\\f\\n\\r\\t\\vaA09"\n', self.getFormatted("bytes with ASCII", "cstring"))
+        self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring"))
 
         # unicode16
         self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678"))

diff  --git a/lldb/test/API/functionalities/data-formatter/stringprinter/main.cpp b/lldb/test/API/functionalities/data-formatter/stringprinter/main.cpp
index ebc51d0c7aed1..cdd1b0ada5e08 100644
--- a/lldb/test/API/functionalities/data-formatter/stringprinter/main.cpp
+++ b/lldb/test/API/functionalities/data-formatter/stringprinter/main.cpp
@@ -8,11 +8,12 @@ struct A {
 
 int main (int argc, char const *argv[])
 {
-    A a, b;
+    A a, b, c;
     // Deliberately write past the end of data to test that the formatter stops
     // at the end of array.
     memcpy(a.data, "FOOBAR", 7);
     memcpy(b.data, "FO\0BAR", 7);
+    memcpy(c.data, "F\0O\0AR", 7);
     std::string stdstring("Hello\t\tWorld\nI am here\t\tto say hello\n"); //%self.addTearDownHook(lambda x: x.runCmd("setting set escape-non-printables true"))
     const char* constcharstar = stdstring.c_str();
     std::string longstring(
@@ -33,13 +34,15 @@ int main (int argc, char const *argv[])
     return 0;     //% if self.TraceOn(): self.runCmd('frame variable')
     //% self.expect_var_path('stdstring', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
     //% self.expect_var_path('constcharstar', summary='"Hello\\t\\tWorld\\nI am here\\t\\tto say hello\\n"')
+    //% self.expect_var_path("a.data", summary='"FOOB"')
+    //% self.expect_var_path("b.data", summary=r'"FO\0B"')
+    //% self.expect_var_path("c.data", summary=r'"F\0O"')
+    //%
     //% self.runCmd("setting set escape-non-printables false")
     //% self.expect_var_path('stdstring', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
     //% self.expect_var_path('constcharstar', summary='"Hello\t\tWorld\nI am here\t\tto say hello\n"')
     //% self.assertTrue(self.frame().FindVariable('longstring').GetSummary().endswith('"...'))
     //% self.assertTrue(self.frame().FindVariable('longconstcharstar').GetSummary().endswith('"...'))
-    //% self.expect_var_path("a.data", summary='"FOOB"')
-    // FIXME: Should this be "FO\0B" instead?
-    //% self.expect_var_path("b.data", summary='"FO"')
+    // FIXME: make "b.data" and "c.data" work sanely
 }
 

diff  --git a/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_const_value.s b/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_const_value.s
index a9421c5fa8548..5753be3f3ba25 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_const_value.s
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_const_value.s
@@ -17,7 +17,7 @@
 ## Variables specified using string forms. This behavior purely speculative -- I
 ## don't know of any compiler that would represent character strings this way.
 # CHECK: (char [7]) string = "string"
-# CHECK: (char [7]) strp = "strp"
+# CHECK: (char [7]) strp = "strp\0\0"
 ## Bogus attribute form. Let's make sure we don't crash at least.
 # CHECK: (char [7]) ref4 = <empty constant data>
 ## A variable of pointer type.


        


More information about the lldb-commits mailing list