[cfe-commits] r115743 - in /cfe/trunk: include/clang/Basic/DiagnosticLexKinds.td lib/Lex/LiteralSupport.cpp test/CodeGen/string-literal-short-wstring.c test/CodeGen/string-literal.c test/Lexer/c90.c test/Lexer/wchar.c
Nico Weber
nicolasweber at gmx.de
Tue Oct 5 21:57:26 PDT 2010
Author: nico
Date: Tue Oct 5 23:57:26 2010
New Revision: 115743
URL: http://llvm.org/viewvc/llvm-project?rev=115743&view=rev
Log:
Add support for 4-byte UCNs like \U12345678. Warn about UCNs in c90 mode.
Added:
cfe/trunk/test/CodeGen/string-literal-short-wstring.c
cfe/trunk/test/Lexer/wchar.c
Modified:
cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
cfe/trunk/lib/Lex/LiteralSupport.cpp
cfe/trunk/test/CodeGen/string-literal.c
cfe/trunk/test/Lexer/c90.c
Modified: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td?rev=115743&r1=115742&r2=115743&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td Tue Oct 5 23:57:26 2010
@@ -98,6 +98,10 @@
def ext_string_too_long : Extension<"string literal of length %0 exceeds "
"maximum length %1 that %select{C90|ISO C99|C++}2 compilers are required to "
"support">, InGroup<OverlengthStrings>;
+def warn_ucn_escape_too_large : ExtWarn<
+ "character unicode escape sequence too long for its type">;
+def warn_ucn_not_valid_in_c89 : ExtWarn<
+ "unicode escape sequences are only valid in C99 or C++">;
//===----------------------------------------------------------------------===//
// PTH Diagnostics
Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=115743&r1=115742&r2=115743&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Tue Oct 5 23:57:26 2010
@@ -172,8 +172,8 @@
SourceLocation Loc, Preprocessor &PP,
bool wide,
bool Complain) {
- // FIXME: Add a warning - UCN's are only valid in C++ & C99.
- // FIXME: Handle wide strings.
+ if (!PP.getLangOptions().CPlusPlus && !PP.getLangOptions().C99)
+ PP.Diag(Loc, diag::warn_ucn_not_valid_in_c89);
// Save the beginning of the string (for error diagnostics).
const char *ThisTokBegin = ThisTokBuf;
@@ -218,13 +218,34 @@
}
if (wide) {
(void)UcnLenSave;
- assert(UcnLenSave == 4 &&
- "ProcessUCNEscape - only ucn length of 4 supported");
- // little endian assumed.
- *ResultBuf++ = (UcnVal & 0x000000FF);
- *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
- *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
- *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
+ assert((UcnLenSave == 4 || UcnLenSave == 8) &&
+ "ProcessUCNEscape - only ucn length of 4 or 8 supported");
+
+ if (!PP.getLangOptions().ShortWChar) {
+ // Note: our internal rep of wide char tokens is always little-endian.
+ *ResultBuf++ = (UcnVal & 0x000000FF);
+ *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
+ *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
+ *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
+ return;
+ }
+
+ // Convert to UTF16.
+ if (UcnVal < (UTF32)0xFFFF) {
+ *ResultBuf++ = (UcnVal & 0x000000FF);
+ *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
+ return;
+ }
+ PP.Diag(Loc, diag::warn_ucn_escape_too_large);
+
+ typedef uint16_t UTF16;
+ UcnVal -= 0x10000;
+ UTF16 surrogate1 = 0xD800 + (UcnVal >> 10);
+ UTF16 surrogate2 = 0xDC00 + (UcnVal & 0x3FF);
+ *ResultBuf++ = (surrogate1 & 0x000000FF);
+ *ResultBuf++ = (surrogate1 & 0x0000FF00) >> 8;
+ *ResultBuf++ = (surrogate2 & 0x000000FF);
+ *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
return;
}
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
Added: cfe/trunk/test/CodeGen/string-literal-short-wstring.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/string-literal-short-wstring.c?rev=115743&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/string-literal-short-wstring.c (added)
+++ cfe/trunk/test/CodeGen/string-literal-short-wstring.c Tue Oct 5 23:57:26 2010
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -emit-llvm -fshort-wchar %s -o - | FileCheck %s
+
+int main() {
+ // This should convert to utf8.
+ // CHECK: internal constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
+ char b[10] = "\u1120\u0220\U00102030";
+
+ // CHECK: private constant [6 x i8] c"A\00B\00\00\00"
+ void *foo = L"AB";
+
+ // This should convert to utf16.
+ // CHECK: private constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00"
+ void *bar = L"\u1120\u0220\U00102030";
+}
Modified: cfe/trunk/test/CodeGen/string-literal.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/string-literal.c?rev=115743&r1=115742&r2=115743&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/string-literal.c (original)
+++ cfe/trunk/test/CodeGen/string-literal.c Tue Oct 5 23:57:26 2010
@@ -1,7 +1,16 @@
-// RUN: %clang_cc1 -emit-llvm %s -o -
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
int main() {
+ // CHECK: internal constant [10 x i8] c"abc\00\00\00\00\00\00\00", align 1
char a[10] = "abc";
+ // This should convert to utf8.
+ // CHECK: internal constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
+ char b[10] = "\u1120\u0220\U00102030";
+
+ // CHECK: private constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00"
void *foo = L"AB";
+
+ // CHECK: private constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00"
+ void *bar = L"\u1234\U0010F00B";
}
Modified: cfe/trunk/test/Lexer/c90.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/c90.c?rev=115743&r1=115742&r2=115743&view=diff
==============================================================================
--- cfe/trunk/test/Lexer/c90.c (original)
+++ cfe/trunk/test/Lexer/c90.c Tue Oct 5 23:57:26 2010
@@ -27,3 +27,7 @@
"sdjflksdjf lksdjf skldfjsdkljflksdjf kldsjflkdsj fldks jflsdkjfds"
"sdjflksdjf lksdjf skldfjsdkljflksdjf kldsjflkdsj fldks jflsdkjfds";
}
+
+void test3() {
+ (void)L"\u1234"; // expected-error {{unicode escape sequences are only valid in C99 or C++}}
+}
Added: cfe/trunk/test/Lexer/wchar.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/wchar.c?rev=115743&view=auto
==============================================================================
--- cfe/trunk/test/Lexer/wchar.c (added)
+++ cfe/trunk/test/Lexer/wchar.c Tue Oct 5 23:57:26 2010
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -fsyntax-only -fshort-wchar -verify %s
+
+void f() {
+ (void)L"\U00010000"; // expected-warning {{character unicode escape sequence too long for its type}}
+}
+
More information about the cfe-commits
mailing list