[lldb-dev] [patch] char8_t support (plus dlang UTF8/16/32)

James Blachly via lldb-dev lldb-dev at lists.llvm.org
Sun Aug 18 18:34:26 PDT 2019


Dear LLDB developers:

I have added support for C++20 char8_t, as well as support for dlang's char/wchar/dchar types. As I am not a professional developer, and the submission-review-merge process for LLVM projects seems somewhat byzantine, I wanted to offer this up on the list in the hopes that others find it useful and someone will be able to integrate it.

kind regards
James


Using an example program that defines each of the unicode types as a single character as well as string, we see a major improvement.

BEFORE:
(lldb) frame v
error: need to add support for DW_TAG_base_type 'char8_t' encoded with DW_ATE = 0x10, bit_size = 8
(void) c8 = <Unable to determine byte size.>

(char16_t) c16 = U+0000 u'\0'
(char32_t) c32 = U+0x00007fff U'翿'
(void [11]) str8 = ([0] = <Unable to determine byte size.>, [1] = <Unable to determine byte size.>, [2] = <Unable to determine byte size.>, [3] = <Unable to determine byte size.>, [4] = <Unable to determine byte size.>, [5] = <Unable to determine byte size.>, [6] = <Unable to determine byte size.>, [7] = <Unable to determine byte size.>, [8] = <Unable to determine byte size.>, [9] = <Unable to determine byte size.>, [10] = <Unable to determine byte size.>)
(void *) str8ptr = 0x00007fffffffe3d9
(char16_t [12]) str16 = u"Hello UTF16"
(char16_t *) str16ptr = 0x00007fffffffe3b0 u"Hello UTF16"
(char32_t [12]) str32 = U"Hello UTF32"
(char32_t *) str32ptr = 0x00007fffffffe370 U"Hello UTF32"

AFTER:
(lldb) frame v
(char8_t) c8 = 0x00 u8'\0'
(char16_t) c16 = U+0000 u'\0'
(char32_t) c32 = U+0x00007fff U'翿'
(char8_t [11]) str8 = u8"Hello UTF8"
(char8_t *) str8ptr = 0x00007fffffffe3c9 u8"Hello UTF8"
(char16_t [12]) str16 = u"Hello UTF16"
(char16_t *) str16ptr = 0x00007fffffffe3a0 u"Hello UTF16"
(char32_t [12]) str32 = U"Hello UTF32"
(char32_t *) str32ptr = 0x00007fffffffe360 U"Hello UTF32”




diff --git a/include/lldb/lldb-enumerations.h b/include/lldb/lldb-enumerations.h
index f9830c04b..e7189dc9d 100644
--- a/include/lldb/lldb-enumerations.h
+++ b/include/lldb/lldb-enumerations.h
@@ -167,6 +167,7 @@ enum Format {
   eFormatOctal,
   eFormatOSType, // OS character codes encoded into an integer 'PICT' 'text'
                  // etc...
+  eFormatUnicode8,
   eFormatUnicode16,
   eFormatUnicode32,
   eFormatUnsigned,
diff --git a/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index 0b3c31816..15e0a82bd 100644
--- a/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -853,6 +853,14 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
 
   // FIXME because of a bug in the FormattersContainer we need to add a summary
   // for both X* and const X* (<rdar://problem/12717717>)
+  AddCXXSummary(
+      cpp_category_sp, lldb_private::formatters::Char8StringSummaryProvider,
+      "char8_t * summary provider", ConstString("char8_t *"), string_flags);
+  AddCXXSummary(cpp_category_sp,
+                lldb_private::formatters::Char8StringSummaryProvider,
+                "char8_t [] summary provider",
+                ConstString("char8_t \\[[0-9]+\\]"), string_array_flags, true);
+
   AddCXXSummary(
       cpp_category_sp, lldb_private::formatters::Char16StringSummaryProvider,
       "char16_t * summary provider", ConstString("char16_t *"), string_flags);
@@ -890,6 +898,9 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
       .SetHideItemNames(true)
       .SetShowMembersOneLiner(false);
 
+  AddCXXSummary(
+      cpp_category_sp, lldb_private::formatters::Char8SummaryProvider,
+      "char8_t summary provider", ConstString("char8_t"), widechar_flags);
   AddCXXSummary(
       cpp_category_sp, lldb_private::formatters::Char16SummaryProvider,
       "char16_t summary provider", ConstString("char16_t"), widechar_flags);
diff --git a/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp b/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
index 959079070..3ea7589d8 100644
--- a/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
+++ b/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
@@ -32,6 +32,31 @@ using namespace lldb;
 using namespace lldb_private;
 using namespace lldb_private::formatters;
 
+bool lldb_private::formatters::Char8StringSummaryProvider(
+    ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
+  ProcessSP process_sp = valobj.GetProcessSP();
+  if (!process_sp)
+    return false;
+
+  lldb::addr_t valobj_addr = GetArrayAddressOrPointerValue(valobj);
+  if (valobj_addr == 0 || valobj_addr == LLDB_INVALID_ADDRESS)
+    return false;
+
+  StringPrinter::ReadStringAndDumpToStreamOptions options(valobj);
+  options.SetLocation(valobj_addr);
+  options.SetProcessSP(process_sp);
+  options.SetStream(&stream);
+  options.SetPrefixToken("u8");
+
+  if (!StringPrinter::ReadStringAndDumpToStream<
+          StringPrinter::StringElementType::UTF8>(options)) {
+    stream.Printf("Summary Unavailable");
+    return true;
+  }
+
+  return true;
+}
+
 bool lldb_private::formatters::Char16StringSummaryProvider(
     ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
   ProcessSP process_sp = valobj.GetProcessSP();
@@ -128,6 +153,32 @@ bool lldb_private::formatters::WCharStringSummaryProvider(
   return true;
 }
 
+bool lldb_private::formatters::Char8SummaryProvider(
+    ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
+  DataExtractor data;
+  Status error;
+  valobj.GetData(data, error);
+
+  if (error.Fail())
+    return false;
+
+  std::string value;
+  valobj.GetValueAsCString(lldb::eFormatUnicode8, value);
+  if (!value.empty())
+    stream.Printf("%s ", value.c_str());
+
+  StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
+  options.SetData(data);
+  options.SetStream(&stream);
+  options.SetPrefixToken("u8");
+  options.SetQuote('\'');
+  options.SetSourceSize(1);
+  options.SetBinaryZeroIsTerminator(false);
+
+  return StringPrinter::ReadBufferAndDumpToStream<
+      StringPrinter::StringElementType::UTF8>(options);
+}
+
 bool lldb_private::formatters::Char16SummaryProvider(
     ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
   DataExtractor data;
diff --git a/source/Plugins/Language/CPlusPlus/CxxStringTypes.h b/source/Plugins/Language/CPlusPlus/CxxStringTypes.h
index 92bef2382..92a01e410 100644
--- a/source/Plugins/Language/CPlusPlus/CxxStringTypes.h
+++ b/source/Plugins/Language/CPlusPlus/CxxStringTypes.h
@@ -16,6 +16,10 @@
 
 namespace lldb_private {
 namespace formatters {
+bool Char8StringSummaryProvider(
+    ValueObject &valobj, Stream &stream,
+    const TypeSummaryOptions &options); // char8_t*
+
 bool Char16StringSummaryProvider(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &options); // char16_t* and unichar*
@@ -27,6 +31,10 @@ bool Char32StringSummaryProvider(
 bool WCharStringSummaryProvider(ValueObject &valobj, Stream &stream,
                                 const TypeSummaryOptions &options); // wchar_t*
 
+bool Char8SummaryProvider(
+    ValueObject &valobj, Stream &stream,
+    const TypeSummaryOptions &options); // char8_t
+
 bool Char16SummaryProvider(
     ValueObject &valobj, Stream &stream,
     const TypeSummaryOptions &options); // char16_t and unichar
diff --git a/source/Symbol/ClangASTContext.cpp b/source/Symbol/ClangASTContext.cpp
index d07adebb8..570bbb7f2 100644
--- a/source/Symbol/ClangASTContext.cpp
+++ b/source/Symbol/ClangASTContext.cpp
@@ -1377,11 +1377,24 @@ CompilerType ClangASTContext::GetBuiltinTypeForDWARFEncodingAndBitSize(
       break;
 
     case DW_ATE_UTF:
+    // char8_t is C++20
+    // char, wchar, dchar plus const() and immutable() variants are dlang
       if (type_name) {
-        if (streq(type_name, "char16_t")) {
-          return CompilerType(this, ast->Char16Ty.getAsOpaquePtr());
-        } else if (streq(type_name, "char32_t")) {
-          return CompilerType(this, ast->Char32Ty.getAsOpaquePtr());
+        if (streq(type_name, "char16_t") ||
+                        streq(type_name, "wchar") ||  // dlang
+                        streq(type_name, "const(wchar)") ||
+                        streq(type_name, "immutable(wchar)")) {
+          return CompilerType(ast, ast->Char16Ty);
+        } else if (streq(type_name, "char32_t") ||
+                        streq(type_name, "dchar") ||  // dlang
+                        streq(type_name, "const(dchar") ||
+                        streq(type_name, "immutable(dchar)")) {
+          return CompilerType(ast, ast->Char32Ty);
+        } else if (streq(type_name, "char8_t") ||  // C++20
+                        streq(type_name, "char") || // dlang
+                        streq(type_name, "const(char)") ||
+                        streq(type_name, "immutable(char)")) {
+          return CompilerType(ast, ast->Char8Ty);
         }
       }
       break;




More information about the lldb-dev mailing list