[Lldb-commits] [lldb] 7aabad1 - [lldb/StringPrinter] Avoid reading garbage in uninitialized strings

Vedant Kumar via lldb-commits lldb-commits at lists.llvm.org
Wed Feb 12 11:24:15 PST 2020


Author: Vedant Kumar
Date: 2020-02-12T11:24:03-08:00
New Revision: 7aabad131288a598c90903ae7d96383105ae3fc2

URL: https://github.com/llvm/llvm-project/commit/7aabad131288a598c90903ae7d96383105ae3fc2
DIFF: https://github.com/llvm/llvm-project/commit/7aabad131288a598c90903ae7d96383105ae3fc2.diff

LOG: [lldb/StringPrinter] Avoid reading garbage in uninitialized strings

This patch fixes a few related out-of-bounds read bugs in the
string data formatters. These issues have to do with mishandling of un-
initialized strings. These manifest as ASan exceptions when debugging a
clang binary.

The first issue was that the std::string formatter treated strings in
"short mode" with length greater than the size of the inline buffer as
valid.

The second issue was that the StringPrinter facility did not check that
a full utf8 codepoint sequence can be read from the buffer (i.e. there
are some missing range checks). I took the opportunity here to delete
some untested code that was meant to deal with invalid input and replace
it with fail-on-invalid logic ([1][2][3]). This means we'll give up on
formatting an invalid string instead of guessing our way through it.

The third issue is that StringPrinter did not check that a utf8 sequence
could actually be fully read from the string payload. This one is especially
tricky as we may overflow the buffer pointer while reading the sequence.

I also noticed that the std::string formatter would spew the raw version of
the underlying ValueObject when garbage is detected. I've changed this to
just print "Summary Unavailable" instead, as we do elsewhere.

I've added regression tests for these issues to
test/functionalities/data-formatter/data-formatter-stl/libcxx/string.

[1]
http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136
[2]
http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163
[3]
http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357

rdar://59080026

Differential Revision: https://reviews.llvm.org/D73860

Added: 
    

Modified: 
    lldb/source/DataFormatters/StringPrinter.cpp
    lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
    lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
    lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/source/DataFormatters/StringPrinter.cpp b/lldb/source/DataFormatters/StringPrinter.cpp
index 1e05c7106c60..92dd71d17b8c 100644
--- a/lldb/source/DataFormatters/StringPrinter.cpp
+++ b/lldb/source/DataFormatters/StringPrinter.cpp
@@ -131,14 +131,15 @@ GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
                                                          uint8_t *&next) {
   StringPrinter::StringPrinterBufferPointer retval{nullptr};
 
-  unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
+  const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
 
-  if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) {
-    // I don't have enough bytes - print whatever I have left
-    retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)};
-    next = buffer_end + 1;
+  // If the utf8 encoded length is invalid, or if there aren't enough bytes to
+  // print, this is some kind of corrupted string.
+  if (utf8_encoded_len == 0 || utf8_encoded_len > 4)
+    return retval;
+  if ((buffer_end - buffer) < utf8_encoded_len)
+    // There's no room in the buffer for the utf8 sequence.
     return retval;
-  }
 
   char32_t codepoint = 0;
   switch (utf8_encoded_len) {
@@ -160,12 +161,6 @@ GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
         (unsigned char)*buffer, (unsigned char)*(buffer + 1),
         (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
     break;
-  default:
-    // this is probably some bogus non-character thing just print it as-is and
-    // hope to sync up again soon
-    retval = {buffer, 1};
-    next = buffer + 1;
-    return retval;
   }
 
   if (codepoint) {
@@ -215,9 +210,7 @@ GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
     return retval;
   }
 
-  // this should not happen - but just in case.. try to resync at some point
-  retval = {buffer, 1};
-  next = buffer + 1;
+  // We couldn't figure out how to print this string.
   return retval;
 }
 
@@ -227,7 +220,7 @@ GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
 static StringPrinter::StringPrinterBufferPointer
 GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
              uint8_t *buffer_end, uint8_t *&next) {
-  if (!buffer)
+  if (!buffer || buffer >= buffer_end)
     return {nullptr};
 
   switch (type) {
@@ -354,13 +347,11 @@ static bool DumpUTFBufferToStream(
             escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
         auto printable_bytes = printable.GetBytes();
         auto printable_size = printable.GetSize();
-        if (!printable_bytes || !next_data) {
-          // GetPrintable() failed on us - print one byte in a desperate resync
-          // attempt
-          printable_bytes = utf8_data_ptr;
-          printable_size = 1;
-          next_data = utf8_data_ptr + 1;
-        }
+
+        // We failed to figure out how to print this string.
+        if (!printable_bytes || !next_data)
+          return false;
+
         for (unsigned c = 0; c < printable_size; c++)
           stream.Printf("%c", *(printable_bytes + c));
         utf8_data_ptr = (uint8_t *)next_data;
@@ -478,13 +469,11 @@ bool StringPrinter::ReadStringAndDumpToStream<
       auto printable = escaping_callback(data, data_end, next_data);
       auto printable_bytes = printable.GetBytes();
       auto printable_size = printable.GetSize();
-      if (!printable_bytes || !next_data) {
-        // GetPrintable() failed on us - print one byte in a desperate resync
-        // attempt
-        printable_bytes = data;
-        printable_size = 1;
-        next_data = data + 1;
-      }
+
+      // We failed to figure out how to print this string.
+      if (!printable_bytes || !next_data)
+        return false;
+
       for (unsigned c = 0; c < printable_size; c++)
         options.GetStream()->Printf("%c", *(printable_bytes + c));
       data = (uint8_t *)next_data;

diff  --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
index b2b68b25eeee..e9e4db97ebbe 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -8,7 +8,6 @@
 
 #include "LibCxx.h"
 
-#include "llvm/ADT/ScopeExit.h"
 #include "lldb/Core/Debugger.h"
 #include "lldb/Core/FormatEntity.h"
 #include "lldb/Core/ValueObject.h"
@@ -527,6 +526,17 @@ static bool ExtractLibcxxStringInfo(ValueObject &valobj,
     size = (layout == eLibcxxStringLayoutModeDSC)
                ? size_mode_value
                : ((size_mode_value >> 1) % 256);
+
+    // When the small-string optimization takes place, the data must fit in the
+    // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
+    // likely that the string isn't initialized and we're reading garbage.
+    ExecutionContext exe_ctx(location_sp->GetExecutionContextRef());
+    const llvm::Optional<uint64_t> max_bytes =
+        location_sp->GetCompilerType().GetByteSize(
+            exe_ctx.GetBestExecutionContextScope());
+    if (!max_bytes || size > *max_bytes)
+      return false;
+
     return (location_sp.get() != nullptr);
   } else {
     ValueObjectSP l(D->GetChildAtIndex(0, true));
@@ -537,9 +547,15 @@ static bool ExtractLibcxxStringInfo(ValueObject &valobj,
                       ? layout_decider
                       : l->GetChildAtIndex(2, true);
     ValueObjectSP size_vo(l->GetChildAtIndex(1, true));
-    if (!size_vo || !location_sp)
+    const unsigned capacity_index =
+        (layout == eLibcxxStringLayoutModeDSC) ? 2 : 0;
+    ValueObjectSP capacity_vo(l->GetChildAtIndex(capacity_index, true));
+    if (!size_vo || !location_sp || !capacity_vo)
+      return false;
+    size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+    const uint64_t cap = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+    if (size == LLDB_INVALID_OFFSET || cap == LLDB_INVALID_OFFSET || cap < size)
       return false;
-    size = size_vo->GetValueAsUnsigned(0);
     return true;
   }
 }
@@ -569,7 +585,9 @@ bool lldb_private::formatters::LibcxxWStringSummaryProvider(
       options.SetIsTruncated(true);
     }
   }
-  location_sp->GetPointeeData(extractor, 0, size);
+  const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+  if (bytes_read < size)
+    return false;
 
   // std::wstring::size() is measured in 'characters', not bytes
   TypeSystemClang *ast_context =
@@ -591,41 +609,33 @@ bool lldb_private::formatters::LibcxxWStringSummaryProvider(
 
   switch (*wchar_t_size) {
   case 1:
-    StringPrinter::ReadBufferAndDumpToStream<
+    return StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF8>(
         options);
     break;
 
   case 2:
-    lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+    return StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF16>(
         options);
     break;
 
   case 4:
-    lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+    return StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF32>(
         options);
-    break;
-
-  default:
-    stream.Printf("size for wchar_t is not valid");
-    return true;
   }
-
-  return true;
+  return false;
 }
 
 template <StringPrinter::StringElementType element_type>
 bool LibcxxStringSummaryProvider(ValueObject &valobj, Stream &stream,
                                  const TypeSummaryOptions &summary_options,
-                                 std::string prefix_token = "") {
+                                 std::string prefix_token) {
   uint64_t size = 0;
   ValueObjectSP location_sp;
-
   if (!ExtractLibcxxStringInfo(valobj, location_sp, size))
     return false;
-
   if (size == 0) {
     stream.Printf("\"\"");
     return true;
@@ -644,7 +654,9 @@ bool LibcxxStringSummaryProvider(ValueObject &valobj, Stream &stream,
       options.SetIsTruncated(true);
     }
   }
-  location_sp->GetPointeeData(extractor, 0, size);
+  const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+  if (bytes_read < size)
+    return false;
 
   options.SetData(extractor);
   options.SetStream(&stream);
@@ -657,28 +669,40 @@ bool LibcxxStringSummaryProvider(ValueObject &valobj, Stream &stream,
   options.SetQuote('"');
   options.SetSourceSize(size);
   options.SetBinaryZeroIsTerminator(false);
-  StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
+  return StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
+}
 
+template <StringPrinter::StringElementType element_type>
+static bool formatStringImpl(ValueObject &valobj, Stream &stream,
+                             const TypeSummaryOptions &summary_options,
+                             std::string prefix_token) {
+  StreamString scratch_stream;
+  const bool success = LibcxxStringSummaryProvider<element_type>(
+      valobj, scratch_stream, summary_options, prefix_token);
+  if (success)
+    stream << scratch_stream.GetData();
+  else
+    stream << "Summary Unavailable";
   return true;
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderASCII(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &summary_options) {
-  return LibcxxStringSummaryProvider<StringPrinter::StringElementType::ASCII>(
-      valobj, stream, summary_options);
+  return formatStringImpl<StringPrinter::StringElementType::ASCII>(
+      valobj, stream, summary_options, "");
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderUTF16(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &summary_options) {
-  return LibcxxStringSummaryProvider<StringPrinter::StringElementType::UTF16>(
+  return formatStringImpl<StringPrinter::StringElementType::UTF16>(
       valobj, stream, summary_options, "u");
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderUTF32(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &summary_options) {
-  return LibcxxStringSummaryProvider<StringPrinter::StringElementType::UTF32>(
+  return formatStringImpl<StringPrinter::StringElementType::UTF32>(
       valobj, stream, summary_options, "U");
 }

diff  --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
index 8943b2596680..f04ec6d402b3 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
@@ -51,6 +51,8 @@ def cleanup():
                 "settings set target.max-children-count 256",
                 check=False)
 
+        is_64_bit = self.process().GetAddressByteSize() == 8
+
         # Execute the cleanup function during test case tear down.
         self.addTearDownHook(cleanup)
 
@@ -114,3 +116,10 @@ def cleanup():
                 '(%s::basic_string<unsigned char, %s::char_traits<unsigned char>, '
                 '%s::allocator<unsigned char> >) uchar = "aaaaa"'%(ns,ns,ns),
         ])
+
+        if is_64_bit:
+            self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable'])
+            self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable'])
+            self.expect("frame variable garbage3", substrs=['garbage3 = Summary Unavailable'])
+            self.expect("frame variable garbage4", substrs=['garbage4 = Summary Unavailable'])
+            self.expect("frame variable garbage5", substrs=['garbage5 = Summary Unavailable'])

diff  --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
index afb56e67f0a5..05af8802aa1b 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,4 +1,58 @@
 #include <string>
+#include <stdint.h>
+
+// For more information about libc++'s std::string ABI, see:
+//
+//   https://joellaity.com/2020/01/31/string.html
+
+// A corrupt string which hits the SSO code path, but has an invalid size.
+static struct {
+  // Set the size of this short-mode string to 116. Note that in short mode,
+  // the size is encoded as `size << 1`.
+  unsigned char size = 232;
+
+  // 23 garbage bytes for the inline string payload.
+  char inline_buf[23] = {0};
+} garbage_string_short_mode;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's inherently too long.
+static unsigned char garbage_utf8_payload1[] = {
+  250 // This means that we expect a 5-byte sequence, this is invalid.
+};
+static struct {
+  uint64_t cap = 5;
+  uint64_t size = 4;
+  unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode1;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's too long to fit in the buffer.
+static unsigned char garbage_utf8_payload2[] = {
+  240 // This means that we expect a 4-byte sequence, but the buffer is too
+      // small for this.
+};
+static struct {
+  uint64_t cap = 3;
+  uint64_t size = 2;
+  unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode2;
+
+// A corrupt libcxx string which has an invalid size (i.e. a size greater than
+// the capacity of the string).
+static struct {
+  uint64_t cap = 5;
+  uint64_t size = 7;
+  const char *data = "foo";
+} garbage_string_long_mode3;
+
+// A corrupt libcxx string in long mode with a payload that would trigger a
+// buffer overflow.
+static struct {
+  uint64_t cap = 5;
+  uint64_t size = 2;
+  uint64_t data = 0xfffffffffffffffeULL;
+} garbage_string_long_mode4;
 
 int main()
 {
@@ -17,6 +71,23 @@ int main()
     std::u32string u32_string(U"🍄🍅🍆🍌");
     std::u32string u32_empty(U"");
     std::basic_string<unsigned char> uchar(5, 'a');
+
+#if _LIBCPP_ABI_VERSION == 1
+    std::string garbage1, garbage2, garbage3, garbage4, garbage5;
+    if (sizeof(std::string) == sizeof(garbage_string_short_mode))
+      memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
+      memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
+      memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
+      memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
+      memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
+#else
+#error "Test potentially needs to be updated for a new std::string ABI."
+#endif
+
     S.assign(L"!!!!!"); // Set break point at this line.
     return 0;
 }


        


More information about the lldb-commits mailing list