[cfe-commits] r154069 - in /cfe/trunk: lib/AST/StmtPrinter.cpp test/SemaCXX/constexpr-printing.cpp test/SemaCXX/static-assert.cpp

Richard Smith richard-llvm at metafoo.co.uk
Wed Apr 4 17:17:45 PDT 2012


Author: rsmith
Date: Wed Apr  4 19:17:44 2012
New Revision: 154069

URL: http://llvm.org/viewvc/llvm-project?rev=154069&view=rev
Log:
Fix assertions and wrong output from StmtPrinter's string literal printing.
String literals (including unicode ones) can contain non-Unicode codepoints
if they were written using \x or similar. Write those out using \x, but be
careful that the following character can't be misinterpreted as part of the
\x escape sequence. Convert UTF-16 surrogate pairs back to codepoints before
rendering them.

Modified:
    cfe/trunk/lib/AST/StmtPrinter.cpp
    cfe/trunk/test/SemaCXX/constexpr-printing.cpp
    cfe/trunk/test/SemaCXX/static-assert.cpp

Modified: cfe/trunk/lib/AST/StmtPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtPrinter.cpp?rev=154069&r1=154068&r2=154069&view=diff
==============================================================================
--- cfe/trunk/lib/AST/StmtPrinter.cpp (original)
+++ cfe/trunk/lib/AST/StmtPrinter.cpp Wed Apr  4 19:17:44 2012
@@ -727,12 +727,40 @@
   OS << '"';
   static char Hex[] = "0123456789ABCDEF";
 
+  unsigned LastSlashX = Str->getLength();
   for (unsigned I = 0, N = Str->getLength(); I != N; ++I) {
     switch (uint32_t Char = Str->getCodeUnit(I)) {
     default:
-      // FIXME: Is this the best way to print wchar_t?
+      // FIXME: Convert UTF-8 back to codepoints before rendering.
+
+      // Convert UTF-16 surrogate pairs back to codepoints before rendering.
+      // Leave invalid surrogates alone; we'll use \x for those.
+      if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 &&
+          Char >= 0xd800 && Char <= 0xdbff) {
+        uint32_t Trail = Str->getCodeUnit(I + 1);
+        if (Trail >= 0xdc00 && Trail <= 0xdfff) {
+          Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
+          ++I;
+        }
+      }
+
       if (Char > 0xff) {
-        assert(Char <= 0x10ffff && "invalid unicode codepoint");
+        // If this is a wide string, output characters over 0xff using \x
+        // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
+        // codepoint: use \x escapes for invalid codepoints.
+        if (Str->getKind() == StringLiteral::Wide ||
+            (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
+          // FIXME: Is this the best way to print wchar_t?
+          OS << "\\x";
+          int Shift = 28;
+          while ((Char >> Shift) == 0)
+            Shift -= 4;
+          for (/**/; Shift >= 0; Shift -= 4)
+            OS << Hex[(Char >> Shift) & 15];
+          LastSlashX = I;
+          break;
+        }
+
         if (Char > 0xffff)
           OS << "\\U00"
              << Hex[(Char >> 20) & 15]
@@ -745,13 +773,26 @@
            << Hex[(Char >>  0) & 15];
         break;
       }
+
+      // If we used \x... for the previous character, and this character is a
+      // hexadecimal digit, prevent it being slurped as part of the \x.
+      if (LastSlashX + 1 == I) {
+        switch (Char) {
+          case '0': case '1': case '2': case '3': case '4':
+          case '5': case '6': case '7': case '8': case '9':
+          case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+          case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+            OS << "\"\"";
+        }
+      }
+
       if (Char <= 0xff && isprint(Char))
         OS << (char)Char;
       else  // Output anything hard as an octal escape.
         OS << '\\'
-        << (char)('0'+ ((Char >> 6) & 7))
-        << (char)('0'+ ((Char >> 3) & 7))
-        << (char)('0'+ ((Char >> 0) & 7));
+           << (char)('0' + ((Char >> 6) & 7))
+           << (char)('0' + ((Char >> 3) & 7))
+           << (char)('0' + ((Char >> 0) & 7));
       break;
     // Handle some common non-printable cases to make dumps prettier.
     case '\\': OS << "\\\\"; break;

Modified: cfe/trunk/test/SemaCXX/constexpr-printing.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/constexpr-printing.cpp?rev=154069&r1=154068&r2=154069&view=diff
==============================================================================
--- cfe/trunk/test/SemaCXX/constexpr-printing.cpp (original)
+++ cfe/trunk/test/SemaCXX/constexpr-printing.cpp Wed Apr  4 19:17:44 2012
@@ -85,8 +85,8 @@
   expected-error {{}} expected-note {{u"test\000\\\"\t\a\b\234\u1234"}}
 constexpr char32_t c32 = get(U"test\0\\\"\t\a\b\234\u1234\U0010ffff"); // \
   expected-error {{}} expected-note {{U"test\000\\\"\t\a\b\234\u1234\U0010FFFF"}}
-constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234"); // \
-  expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\u1234"}}
+constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234\xffffffff"); // \
+  expected-error {{}} expected-note {{L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF"}}
 
 constexpr char32_t c32_err = get(U"\U00110000"); // expected-error {{invalid universal character}}
 

Modified: cfe/trunk/test/SemaCXX/static-assert.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/static-assert.cpp?rev=154069&r1=154068&r2=154069&view=diff
==============================================================================
--- cfe/trunk/test/SemaCXX/static-assert.cpp (original)
+++ cfe/trunk/test/SemaCXX/static-assert.cpp Wed Apr  4 19:17:44 2012
@@ -27,3 +27,10 @@
 
 S<char> s1; // expected-note {{in instantiation of template class 'S<char>' requested here}}
 S<int> s2;
+
+static_assert(false, L"\xFFFFFFFF"); // expected-error {{static_assert failed L"\xFFFFFFFF"}}
+static_assert(false, u"\U000317FF"); // expected-error {{static_assert failed u"\U000317FF"}}
+// FIXME: render this as u8"\u03A9"
+static_assert(false, u8"Ω"); // expected-error {{static_assert failed u8"\316\251"}}
+static_assert(false, L"\u1234"); // expected-error {{static_assert failed L"\x1234"}}
+static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static_assert failed L"\x1FF""0\x123""fx\xFFFFFgoop"}}





More information about the cfe-commits mailing list