[lldb-dev] [patch] char8_t support (plus dlang UTF8/16/32)

Jonas Devlieghere via lldb-dev lldb-dev at lists.llvm.org
Mon Aug 19 15:14:54 PDT 2019


Hi James,

Thanks for working on this. I've opened a code review for your patch:
https://reviews.llvm.org/D66447

I've had to make some modification for it to compile and added a test.

Cheers,
Jonas

On Sun, Aug 18, 2019 at 6:34 PM James Blachly via lldb-dev
<lldb-dev at lists.llvm.org> wrote:
>
> Dear LLDB developers:
>
> I have added support for C++20 char8_t, as well as support for dlang's char/wchar/dchar types. As I am not a professional developer, and the submission-review-merge process for LLVM projects seems somewhat byzantine, I wanted to offer this up on the list in the hopes that others find it useful and someone will be able to integrate it.
>
> kind regards
> James
>
>
> Using an example program that defines each of the unicode types as a single character as well as string, we see a major improvement.
>
> BEFORE:
> (lldb) frame v
> error: need to add support for DW_TAG_base_type 'char8_t' encoded with DW_ATE = 0x10, bit_size = 8
> (void) c8 = <Unable to determine byte size.>
>
> (char16_t) c16 = U+0000 u'\0'
> (char32_t) c32 = U+0x00007fff U'翿'
> (void [11]) str8 = ([0] = <Unable to determine byte size.>, [1] = <Unable to determine byte size.>, [2] = <Unable to determine byte size.>, [3] = <Unable to determine byte size.>, [4] = <Unable to determine byte size.>, [5] = <Unable to determine byte size.>, [6] = <Unable to determine byte size.>, [7] = <Unable to determine byte size.>, [8] = <Unable to determine byte size.>, [9] = <Unable to determine byte size.>, [10] = <Unable to determine byte size.>)
> (void *) str8ptr = 0x00007fffffffe3d9
> (char16_t [12]) str16 = u"Hello UTF16"
> (char16_t *) str16ptr = 0x00007fffffffe3b0 u"Hello UTF16"
> (char32_t [12]) str32 = U"Hello UTF32"
> (char32_t *) str32ptr = 0x00007fffffffe370 U"Hello UTF32"
>
> AFTER:
> (lldb) frame v
> (char8_t) c8 = 0x00 u8'\0'
> (char16_t) c16 = U+0000 u'\0'
> (char32_t) c32 = U+0x00007fff U'翿'
> (char8_t [11]) str8 = u8"Hello UTF8"
> (char8_t *) str8ptr = 0x00007fffffffe3c9 u8"Hello UTF8"
> (char16_t [12]) str16 = u"Hello UTF16"
> (char16_t *) str16ptr = 0x00007fffffffe3a0 u"Hello UTF16"
> (char32_t [12]) str32 = U"Hello UTF32"
> (char32_t *) str32ptr = 0x00007fffffffe360 U"Hello UTF32”
>
>
>
>
> diff --git a/include/lldb/lldb-enumerations.h b/include/lldb/lldb-enumerations.h
> index f9830c04b..e7189dc9d 100644
> --- a/include/lldb/lldb-enumerations.h
> +++ b/include/lldb/lldb-enumerations.h
> @@ -167,6 +167,7 @@ enum Format {
>    eFormatOctal,
>    eFormatOSType, // OS character codes encoded into an integer 'PICT' 'text'
>                   // etc...
> +  eFormatUnicode8,
>    eFormatUnicode16,
>    eFormatUnicode32,
>    eFormatUnsigned,
> diff --git a/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
> index 0b3c31816..15e0a82bd 100644
> --- a/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
> +++ b/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
> @@ -853,6 +853,14 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
>
>    // FIXME because of a bug in the FormattersContainer we need to add a summary
>    // for both X* and const X* (<rdar://problem/12717717>)
> +  AddCXXSummary(
> +      cpp_category_sp, lldb_private::formatters::Char8StringSummaryProvider,
> +      "char8_t * summary provider", ConstString("char8_t *"), string_flags);
> +  AddCXXSummary(cpp_category_sp,
> +                lldb_private::formatters::Char8StringSummaryProvider,
> +                "char8_t [] summary provider",
> +                ConstString("char8_t \\[[0-9]+\\]"), string_array_flags, true);
> +
>    AddCXXSummary(
>        cpp_category_sp, lldb_private::formatters::Char16StringSummaryProvider,
>        "char16_t * summary provider", ConstString("char16_t *"), string_flags);
> @@ -890,6 +898,9 @@ static void LoadSystemFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
>        .SetHideItemNames(true)
>        .SetShowMembersOneLiner(false);
>
> +  AddCXXSummary(
> +      cpp_category_sp, lldb_private::formatters::Char8SummaryProvider,
> +      "char8_t summary provider", ConstString("char8_t"), widechar_flags);
>    AddCXXSummary(
>        cpp_category_sp, lldb_private::formatters::Char16SummaryProvider,
>        "char16_t summary provider", ConstString("char16_t"), widechar_flags);
> diff --git a/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp b/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
> index 959079070..3ea7589d8 100644
> --- a/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
> +++ b/source/Plugins/Language/CPlusPlus/CxxStringTypes.cpp
> @@ -32,6 +32,31 @@ using namespace lldb;
>  using namespace lldb_private;
>  using namespace lldb_private::formatters;
>
> +bool lldb_private::formatters::Char8StringSummaryProvider(
> +    ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
> +  ProcessSP process_sp = valobj.GetProcessSP();
> +  if (!process_sp)
> +    return false;
> +
> +  lldb::addr_t valobj_addr = GetArrayAddressOrPointerValue(valobj);
> +  if (valobj_addr == 0 || valobj_addr == LLDB_INVALID_ADDRESS)
> +    return false;
> +
> +  StringPrinter::ReadStringAndDumpToStreamOptions options(valobj);
> +  options.SetLocation(valobj_addr);
> +  options.SetProcessSP(process_sp);
> +  options.SetStream(&stream);
> +  options.SetPrefixToken("u8");
> +
> +  if (!StringPrinter::ReadStringAndDumpToStream<
> +          StringPrinter::StringElementType::UTF8>(options)) {
> +    stream.Printf("Summary Unavailable");
> +    return true;
> +  }
> +
> +  return true;
> +}
> +
>  bool lldb_private::formatters::Char16StringSummaryProvider(
>      ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
>    ProcessSP process_sp = valobj.GetProcessSP();
> @@ -128,6 +153,32 @@ bool lldb_private::formatters::WCharStringSummaryProvider(
>    return true;
>  }
>
> +bool lldb_private::formatters::Char8SummaryProvider(
> +    ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
> +  DataExtractor data;
> +  Status error;
> +  valobj.GetData(data, error);
> +
> +  if (error.Fail())
> +    return false;
> +
> +  std::string value;
> +  valobj.GetValueAsCString(lldb::eFormatUnicode8, value);
> +  if (!value.empty())
> +    stream.Printf("%s ", value.c_str());
> +
> +  StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
> +  options.SetData(data);
> +  options.SetStream(&stream);
> +  options.SetPrefixToken("u8");
> +  options.SetQuote('\'');
> +  options.SetSourceSize(1);
> +  options.SetBinaryZeroIsTerminator(false);
> +
> +  return StringPrinter::ReadBufferAndDumpToStream<
> +      StringPrinter::StringElementType::UTF8>(options);
> +}
> +
>  bool lldb_private::formatters::Char16SummaryProvider(
>      ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) {
>    DataExtractor data;
> diff --git a/source/Plugins/Language/CPlusPlus/CxxStringTypes.h b/source/Plugins/Language/CPlusPlus/CxxStringTypes.h
> index 92bef2382..92a01e410 100644
> --- a/source/Plugins/Language/CPlusPlus/CxxStringTypes.h
> +++ b/source/Plugins/Language/CPlusPlus/CxxStringTypes.h
> @@ -16,6 +16,10 @@
>
>  namespace lldb_private {
>  namespace formatters {
> +bool Char8StringSummaryProvider(
> +    ValueObject &valobj, Stream &stream,
> +    const TypeSummaryOptions &options); // char8_t*
> +
>  bool Char16StringSummaryProvider(
>      ValueObject &valobj, Stream &stream,
>      const TypeSummaryOptions &options); // char16_t* and unichar*
> @@ -27,6 +31,10 @@ bool Char32StringSummaryProvider(
>  bool WCharStringSummaryProvider(ValueObject &valobj, Stream &stream,
>                                  const TypeSummaryOptions &options); // wchar_t*
>
> +bool Char8SummaryProvider(
> +    ValueObject &valobj, Stream &stream,
> +    const TypeSummaryOptions &options); // char8_t
> +
>  bool Char16SummaryProvider(
>      ValueObject &valobj, Stream &stream,
>      const TypeSummaryOptions &options); // char16_t and unichar
> diff --git a/source/Symbol/ClangASTContext.cpp b/source/Symbol/ClangASTContext.cpp
> index d07adebb8..570bbb7f2 100644
> --- a/source/Symbol/ClangASTContext.cpp
> +++ b/source/Symbol/ClangASTContext.cpp
> @@ -1377,11 +1377,24 @@ CompilerType ClangASTContext::GetBuiltinTypeForDWARFEncodingAndBitSize(
>        break;
>
>      case DW_ATE_UTF:
> +    // char8_t is C++20
> +    // char, wchar, dchar plus const() and immutable() variants are dlang
>        if (type_name) {
> -        if (streq(type_name, "char16_t")) {
> -          return CompilerType(this, ast->Char16Ty.getAsOpaquePtr());
> -        } else if (streq(type_name, "char32_t")) {
> -          return CompilerType(this, ast->Char32Ty.getAsOpaquePtr());
> +        if (streq(type_name, "char16_t") ||
> +                        streq(type_name, "wchar") ||  // dlang
> +                        streq(type_name, "const(wchar)") ||
> +                        streq(type_name, "immutable(wchar)")) {
> +          return CompilerType(ast, ast->Char16Ty);
> +        } else if (streq(type_name, "char32_t") ||
> +                        streq(type_name, "dchar") ||  // dlang
> +                        streq(type_name, "const(dchar") ||
> +                        streq(type_name, "immutable(dchar)")) {
> +          return CompilerType(ast, ast->Char32Ty);
> +        } else if (streq(type_name, "char8_t") ||  // C++20
> +                        streq(type_name, "char") || // dlang
> +                        streq(type_name, "const(char)") ||
> +                        streq(type_name, "immutable(char)")) {
> +          return CompilerType(ast, ast->Char8Ty);
>          }
>        }
>        break;
>
>
> _______________________________________________
> lldb-dev mailing list
> lldb-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-dev


More information about the lldb-dev mailing list