[cfe-commits] r158417 - in /cfe/trunk: include/clang/AST/Expr.h lib/AST/Expr.cpp lib/AST/StmtDumper.cpp lib/AST/StmtPrinter.cpp test/Misc/ast-dump-wchar.cpp
Richard Trieu
rtrieu at google.com
Wed Jun 13 13:25:25 PDT 2012
Author: rtrieu
Date: Wed Jun 13 15:25:24 2012
New Revision: 158417
URL: http://llvm.org/viewvc/llvm-project?rev=158417&view=rev
Log:
Moved the StringLiteral printing code from StmtPrinter into the StringLiteral
class and have StmtPrinter and StmtDumper refer to it. This fixes an
assertion failure when dumping wchar string literals.
Added:
cfe/trunk/test/Misc/ast-dump-wchar.cpp
Modified:
cfe/trunk/include/clang/AST/Expr.h
cfe/trunk/lib/AST/Expr.cpp
cfe/trunk/lib/AST/StmtDumper.cpp
cfe/trunk/lib/AST/StmtPrinter.cpp
Modified: cfe/trunk/include/clang/AST/Expr.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Expr.h?rev=158417&r1=158416&r2=158417&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/Expr.h (original)
+++ cfe/trunk/include/clang/AST/Expr.h Wed Jun 13 15:25:24 2012
@@ -1399,6 +1399,8 @@
getByteLength());
}
+ void outputString(raw_ostream &OS);
+
uint32_t getCodeUnit(size_t i) const {
assert(i < Length && "out of bounds access");
if (CharByteWidth == 1)
Modified: cfe/trunk/lib/AST/Expr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=158417&r1=158416&r2=158417&view=diff
==============================================================================
--- cfe/trunk/lib/AST/Expr.cpp (original)
+++ cfe/trunk/lib/AST/Expr.cpp Wed Jun 13 15:25:24 2012
@@ -633,6 +633,99 @@
return SL;
}
+void StringLiteral::outputString(raw_ostream &OS) {
+ switch (getKind()) {
+ case Ascii: break; // no prefix.
+ case Wide: OS << 'L'; break;
+ case UTF8: OS << "u8"; break;
+ case UTF16: OS << 'u'; break;
+ case UTF32: OS << 'U'; break;
+ }
+ OS << '"';
+ static const char Hex[] = "0123456789ABCDEF";
+
+ unsigned LastSlashX = getLength();
+ for (unsigned I = 0, N = getLength(); I != N; ++I) {
+ switch (uint32_t Char = getCodeUnit(I)) {
+ default:
+ // FIXME: Convert UTF-8 back to codepoints before rendering.
+
+ // Convert UTF-16 surrogate pairs back to codepoints before rendering.
+ // Leave invalid surrogates alone; we'll use \x for those.
+ if (getKind() == UTF16 && I != N - 1 && Char >= 0xd800 &&
+ Char <= 0xdbff) {
+ uint32_t Trail = getCodeUnit(I + 1);
+ if (Trail >= 0xdc00 && Trail <= 0xdfff) {
+ Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
+ ++I;
+ }
+ }
+
+ if (Char > 0xff) {
+ // If this is a wide string, output characters over 0xff using \x
+ // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
+ // codepoint: use \x escapes for invalid codepoints.
+ if (getKind() == Wide ||
+ (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
+ // FIXME: Is this the best way to print wchar_t?
+ OS << "\\x";
+ int Shift = 28;
+ while ((Char >> Shift) == 0)
+ Shift -= 4;
+ for (/**/; Shift >= 0; Shift -= 4)
+ OS << Hex[(Char >> Shift) & 15];
+ LastSlashX = I;
+ break;
+ }
+
+ if (Char > 0xffff)
+ OS << "\\U00"
+ << Hex[(Char >> 20) & 15]
+ << Hex[(Char >> 16) & 15];
+ else
+ OS << "\\u";
+ OS << Hex[(Char >> 12) & 15]
+ << Hex[(Char >> 8) & 15]
+ << Hex[(Char >> 4) & 15]
+ << Hex[(Char >> 0) & 15];
+ break;
+ }
+
+ // If we used \x... for the previous character, and this character is a
+ // hexadecimal digit, prevent it being slurped as part of the \x.
+ if (LastSlashX + 1 == I) {
+ switch (Char) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ OS << "\"\"";
+ }
+ }
+
+ assert(Char <= 0xff &&
+ "Characters above 0xff should already have been handled.");
+
+ if (isprint(Char))
+ OS << (char)Char;
+ else // Output anything hard as an octal escape.
+ OS << '\\'
+ << (char)('0' + ((Char >> 6) & 7))
+ << (char)('0' + ((Char >> 3) & 7))
+ << (char)('0' + ((Char >> 0) & 7));
+ break;
+ // Handle some common non-printable cases to make dumps prettier.
+ case '\\': OS << "\\\\"; break;
+ case '"': OS << "\\\""; break;
+ case '\n': OS << "\\n"; break;
+ case '\t': OS << "\\t"; break;
+ case '\a': OS << "\\a"; break;
+ case '\b': OS << "\\b"; break;
+ }
+ }
+ OS << '"';
+}
+
void StringLiteral::setString(ASTContext &C, StringRef Str,
StringKind Kind, bool IsPascal) {
//FIXME: we assume that the string data comes from a target that uses the same
Modified: cfe/trunk/lib/AST/StmtDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtDumper.cpp?rev=158417&r1=158416&r2=158417&view=diff
==============================================================================
--- cfe/trunk/lib/AST/StmtDumper.cpp (original)
+++ cfe/trunk/lib/AST/StmtDumper.cpp Wed Jun 13 15:25:24 2012
@@ -446,18 +446,8 @@
void StmtDumper::VisitStringLiteral(StringLiteral *Str) {
DumpExpr(Str);
- // FIXME: this doesn't print wstrings right.
OS << " ";
- switch (Str->getKind()) {
- case StringLiteral::Ascii: break; // No prefix
- case StringLiteral::Wide: OS << 'L'; break;
- case StringLiteral::UTF8: OS << "u8"; break;
- case StringLiteral::UTF16: OS << 'u'; break;
- case StringLiteral::UTF32: OS << 'U'; break;
- }
- OS << '"';
- OS.write_escaped(Str->getString());
- OS << '"';
+ Str->outputString(OS);
}
void StmtDumper::VisitUnaryOperator(UnaryOperator *Node) {
Modified: cfe/trunk/lib/AST/StmtPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtPrinter.cpp?rev=158417&r1=158416&r2=158417&view=diff
==============================================================================
--- cfe/trunk/lib/AST/StmtPrinter.cpp (original)
+++ cfe/trunk/lib/AST/StmtPrinter.cpp Wed Jun 13 15:25:24 2012
@@ -739,93 +739,7 @@
}
void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
- switch (Str->getKind()) {
- case StringLiteral::Ascii: break; // no prefix.
- case StringLiteral::Wide: OS << 'L'; break;
- case StringLiteral::UTF8: OS << "u8"; break;
- case StringLiteral::UTF16: OS << 'u'; break;
- case StringLiteral::UTF32: OS << 'U'; break;
- }
- OS << '"';
- static const char Hex[] = "0123456789ABCDEF";
-
- unsigned LastSlashX = Str->getLength();
- for (unsigned I = 0, N = Str->getLength(); I != N; ++I) {
- switch (uint32_t Char = Str->getCodeUnit(I)) {
- default:
- // FIXME: Convert UTF-8 back to codepoints before rendering.
-
- // Convert UTF-16 surrogate pairs back to codepoints before rendering.
- // Leave invalid surrogates alone; we'll use \x for those.
- if (Str->getKind() == StringLiteral::UTF16 && I != N - 1 &&
- Char >= 0xd800 && Char <= 0xdbff) {
- uint32_t Trail = Str->getCodeUnit(I + 1);
- if (Trail >= 0xdc00 && Trail <= 0xdfff) {
- Char = 0x10000 + ((Char - 0xd800) << 10) + (Trail - 0xdc00);
- ++I;
- }
- }
-
- if (Char > 0xff) {
- // If this is a wide string, output characters over 0xff using \x
- // escapes. Otherwise, this is a UTF-16 or UTF-32 string, and Char is a
- // codepoint: use \x escapes for invalid codepoints.
- if (Str->getKind() == StringLiteral::Wide ||
- (Char >= 0xd800 && Char <= 0xdfff) || Char >= 0x110000) {
- // FIXME: Is this the best way to print wchar_t?
- OS << "\\x";
- int Shift = 28;
- while ((Char >> Shift) == 0)
- Shift -= 4;
- for (/**/; Shift >= 0; Shift -= 4)
- OS << Hex[(Char >> Shift) & 15];
- LastSlashX = I;
- break;
- }
-
- if (Char > 0xffff)
- OS << "\\U00"
- << Hex[(Char >> 20) & 15]
- << Hex[(Char >> 16) & 15];
- else
- OS << "\\u";
- OS << Hex[(Char >> 12) & 15]
- << Hex[(Char >> 8) & 15]
- << Hex[(Char >> 4) & 15]
- << Hex[(Char >> 0) & 15];
- break;
- }
-
- // If we used \x... for the previous character, and this character is a
- // hexadecimal digit, prevent it being slurped as part of the \x.
- if (LastSlashX + 1 == I) {
- switch (Char) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- OS << "\"\"";
- }
- }
-
- if (Char <= 0xff && isprint(Char))
- OS << (char)Char;
- else // Output anything hard as an octal escape.
- OS << '\\'
- << (char)('0' + ((Char >> 6) & 7))
- << (char)('0' + ((Char >> 3) & 7))
- << (char)('0' + ((Char >> 0) & 7));
- break;
- // Handle some common non-printable cases to make dumps prettier.
- case '\\': OS << "\\\\"; break;
- case '"': OS << "\\\""; break;
- case '\n': OS << "\\n"; break;
- case '\t': OS << "\\t"; break;
- case '\a': OS << "\\a"; break;
- case '\b': OS << "\\b"; break;
- }
- }
- OS << '"';
+ Str->outputString(OS);
}
void StmtPrinter::VisitParenExpr(ParenExpr *Node) {
OS << "(";
Added: cfe/trunk/test/Misc/ast-dump-wchar.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/ast-dump-wchar.cpp?rev=158417&view=auto
==============================================================================
--- cfe/trunk/test/Misc/ast-dump-wchar.cpp (added)
+++ cfe/trunk/test/Misc/ast-dump-wchar.cpp Wed Jun 13 15:25:24 2012
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -std=c++11 -ast-dump %s 2>&1 | FileCheck %s
+
+char c8[] = u8"test\0\\\"\t\a\b\234";
+// CHECK: char c8[12] = (StringLiteral {{.*}} lvalue u8"test\000\\\"\t\a\b\234")
+
+char16_t c16[] = u"test\0\\\"\t\a\b\234\u1234";
+// CHECK: char16_t c16[13] = (StringLiteral {{.*}} lvalue u"test\000\\\"\t\a\b\234\u1234")
+
+char32_t c32[] = U"test\0\\\"\t\a\b\234\u1234\U0010ffff"; // \
+// CHECK: char32_t c32[14] = (StringLiteral {{.*}} lvalue U"test\000\\\"\t\a\b\234\u1234\U0010FFFF")
+
+wchar_t wc[] = L"test\0\\\"\t\a\b\234\u1234\xffffffff"; // \
+// CHECK: wchar_t wc[14] = (StringLiteral {{.*}} lvalue L"test\000\\\"\t\a\b\234\x1234\xFFFFFFFF")
More information about the cfe-commits
mailing list