[cfe-commits] r112672 - in /cfe/trunk: lib/Lex/LiteralSupport.cpp test/CodeGenCXX/uncode-string.cpp
Fariborz Jahanian
fjahanian at apple.com
Tue Aug 31 16:34:27 PDT 2010
Author: fjahanian
Date: Tue Aug 31 18:34:27 2010
New Revision: 112672
URL: http://llvm.org/viewvc/llvm-project?rev=112672&view=rev
Log:
Some support for unicode string constants
in wide strings. radar 8360841.
Added:
cfe/trunk/test/CodeGenCXX/uncode-string.cpp
Modified:
cfe/trunk/lib/Lex/LiteralSupport.cpp
Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=112672&r1=112671&r2=112672&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Tue Aug 31 18:34:27 2010
@@ -170,6 +170,7 @@
static void ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
char *&ResultBuf, bool &HadError,
SourceLocation Loc, Preprocessor &PP,
+ bool wide,
bool Complain) {
// FIXME: Add a warning - UCN's are only valid in C++ & C99.
// FIXME: Handle wide strings.
@@ -190,6 +191,7 @@
UTF32 UcnVal = 0;
unsigned short UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
+ unsigned short UcnLenSave = UcnLen;
for (; ThisTokBuf != ThisTokEnd && UcnLen; ++ThisTokBuf, UcnLen--) {
int CharVal = HexDigitValue(ThisTokBuf[0]);
if (CharVal == -1) break;
@@ -214,6 +216,16 @@
HadError = 1;
return;
}
+ if (wide) {
+ assert(UcnLenSave == 4 &&
+ "ProcessUCNEscape - only ucn length of 4 supported");
+ // little endian assumed.
+ *ResultBuf++ = (UcnVal & 0x000000FF);
+ *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
+ *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
+ *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
+ return;
+ }
// Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
// The conversion below was inspired by:
// http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
@@ -830,12 +842,14 @@
}
const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
-
+ bool wide = false;
// TODO: Input character set mapping support.
// Skip L marker for wide strings.
- if (ThisTokBuf[0] == 'L')
+ if (ThisTokBuf[0] == 'L') {
+ wide = true;
++ThisTokBuf;
+ }
assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
++ThisTokBuf;
@@ -880,7 +894,8 @@
// Is this a Universal Character Name escape?
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
- hadError, StringToks[i].getLocation(), PP, Complain);
+ hadError, StringToks[i].getLocation(), PP, wide,
+ Complain);
continue;
}
// Otherwise, this is a non-UCN escape character. Process it.
Added: cfe/trunk/test/CodeGenCXX/uncode-string.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/uncode-string.cpp?rev=112672&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenCXX/uncode-string.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/uncode-string.cpp Tue Aug 31 18:34:27 2010
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
+// rdar://8360841
+
+wchar_t s[] = L"\u2722";
+
+// CHECK: @s = global [8 x i8] c"\22'\00\00\00\00\00\00"
More information about the cfe-commits
mailing list