[cfe-commits] r136210 - in /cfe/trunk: include/clang/AST/ include/clang/Basic/ include/clang/Lex/ include/clang/Parse/ lib/AST/ lib/CodeGen/ lib/Lex/ lib/Parse/ lib/Rewrite/ lib/Sema/ lib/Serialization/ test/CXX/lex/lex.literal/lex.ccon/ test/Cod
Nico Weber
thakis at chromium.org
Wed Jul 27 07:17:02 PDT 2011
On Tue, Jul 26, 2011 at 10:40 PM, Douglas Gregor <dgregor at apple.com> wrote:
> Author: dgregor
> Date: Wed Jul 27 00:40:30 2011
> New Revision: 136210
>
> URL: http://llvm.org/viewvc/llvm-project?rev=136210&view=rev
> Log:
> Add support for C++0x unicode string and character literals, from Craig Topper!
Cool! One question below.
>
> Modified:
> cfe/trunk/include/clang/AST/Expr.h
> cfe/trunk/include/clang/AST/Type.h
> cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
> cfe/trunk/include/clang/Basic/IdentifierTable.h
> cfe/trunk/include/clang/Basic/TokenKinds.def
> cfe/trunk/include/clang/Lex/Lexer.h
> cfe/trunk/include/clang/Lex/LiteralSupport.h
> cfe/trunk/include/clang/Lex/Token.h
> cfe/trunk/include/clang/Lex/TokenConcatenation.h
> cfe/trunk/include/clang/Parse/Parser.h
> cfe/trunk/lib/AST/ASTImporter.cpp
> cfe/trunk/lib/AST/Expr.cpp
> cfe/trunk/lib/AST/StmtDumper.cpp
> cfe/trunk/lib/AST/StmtPrinter.cpp
> cfe/trunk/lib/AST/StmtProfile.cpp
> cfe/trunk/lib/AST/Type.cpp
> cfe/trunk/lib/CodeGen/CodeGenModule.cpp
> cfe/trunk/lib/Lex/Lexer.cpp
> cfe/trunk/lib/Lex/LiteralSupport.cpp
> cfe/trunk/lib/Lex/MacroArgs.cpp
> cfe/trunk/lib/Lex/PPDirectives.cpp
> cfe/trunk/lib/Lex/PPExpressions.cpp
> cfe/trunk/lib/Lex/Pragma.cpp
> cfe/trunk/lib/Lex/TokenConcatenation.cpp
> cfe/trunk/lib/Parse/ParseCXXInlineMethods.cpp
> cfe/trunk/lib/Parse/ParseExpr.cpp
> cfe/trunk/lib/Parse/ParseTentative.cpp
> cfe/trunk/lib/Parse/Parser.cpp
> cfe/trunk/lib/Rewrite/HTMLRewrite.cpp
> cfe/trunk/lib/Rewrite/RewriteObjC.cpp
> cfe/trunk/lib/Sema/SemaChecking.cpp
> cfe/trunk/lib/Sema/SemaDeclAttr.cpp
> cfe/trunk/lib/Sema/SemaExpr.cpp
> cfe/trunk/lib/Sema/SemaExprCXX.cpp
> cfe/trunk/lib/Sema/SemaExprObjC.cpp
> cfe/trunk/lib/Sema/SemaInit.cpp
> cfe/trunk/lib/Sema/SemaStmt.cpp
> cfe/trunk/lib/Sema/SemaTemplate.cpp
> cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
> cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
> cfe/trunk/test/CXX/lex/lex.literal/lex.ccon/p1.cpp
> cfe/trunk/test/CodeGen/char-literal.c
> cfe/trunk/test/CodeGen/string-literal.c
> cfe/trunk/test/Lexer/wchar.c
> cfe/trunk/test/Parser/char-literal-printing.c
> cfe/trunk/test/SemaCXX/type-convert-construct.cpp
>
> Modified: cfe/trunk/include/clang/AST/Expr.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Expr.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/AST/Expr.h (original)
> +++ cfe/trunk/include/clang/AST/Expr.h Wed Jul 27 00:40:30 2011
> @@ -1112,29 +1112,39 @@
> };
>
> class CharacterLiteral : public Expr {
> +public:
> + enum CharacterKind {
> + Ascii,
> + Wide,
> + UTF16,
> + UTF32
> + };
> +
> +private:
> unsigned Value;
> SourceLocation Loc;
> - bool IsWide;
> + unsigned Kind : 2;
> public:
> // type should be IntTy
> - CharacterLiteral(unsigned value, bool iswide, QualType type, SourceLocation l)
> + CharacterLiteral(unsigned value, CharacterKind kind, QualType type,
> + SourceLocation l)
> : Expr(CharacterLiteralClass, type, VK_RValue, OK_Ordinary, false, false,
> false, false),
> - Value(value), Loc(l), IsWide(iswide) {
> + Value(value), Loc(l), Kind(kind) {
> }
>
> /// \brief Construct an empty character literal.
> CharacterLiteral(EmptyShell Empty) : Expr(CharacterLiteralClass, Empty) { }
>
> SourceLocation getLocation() const { return Loc; }
> - bool isWide() const { return IsWide; }
> + CharacterKind getKind() const { return static_cast<CharacterKind>(Kind); }
>
> SourceRange getSourceRange() const { return SourceRange(Loc); }
>
> unsigned getValue() const { return Value; }
>
> void setLocation(SourceLocation Location) { Loc = Location; }
> - void setWide(bool W) { IsWide = W; }
> + void setKind(CharacterKind kind) { Kind = kind; }
> void setValue(unsigned Val) { Value = Val; }
>
> static bool classof(const Stmt *T) {
> @@ -1243,13 +1253,23 @@
> /// In this case, getByteLength() will return 6, but the string literal will
> /// have type "char[2]".
> class StringLiteral : public Expr {
> +public:
> + enum StringKind {
> + Ascii,
> + Wide,
> + UTF8,
> + UTF16,
> + UTF32
> + };
> +
> +private:
> friend class ASTStmtReader;
>
> const char *StrData;
> unsigned ByteLength;
> - bool IsWide;
> - bool IsPascal;
> unsigned NumConcatenated;
> + unsigned Kind : 3;
> + bool IsPascal : 1;
> SourceLocation TokLocs[1];
>
> StringLiteral(QualType Ty) :
> @@ -1259,14 +1279,15 @@
> public:
> /// This is the "fully general" constructor that allows representation of
> /// strings formed from multiple concatenated tokens.
> - static StringLiteral *Create(ASTContext &C, StringRef Str, bool Wide,
> + static StringLiteral *Create(ASTContext &C, StringRef Str, StringKind Kind,
> bool Pascal, QualType Ty,
> const SourceLocation *Loc, unsigned NumStrs);
>
> /// Simple constructor for string literals made from one token.
> - static StringLiteral *Create(ASTContext &C, StringRef Str, bool Wide,
> - bool Pascal, QualType Ty, SourceLocation Loc) {
> - return Create(C, Str, Wide, Pascal, Ty, &Loc, 1);
> + static StringLiteral *Create(ASTContext &C, StringRef Str, StringKind Kind,
> + bool Pascal, QualType Ty,
> + SourceLocation Loc) {
> + return Create(C, Str, Kind, Pascal, Ty, &Loc, 1);
> }
>
> /// \brief Construct an empty string literal.
> @@ -1281,9 +1302,14 @@
> /// \brief Sets the string data to the given string data.
> void setString(ASTContext &C, StringRef Str);
>
> - bool isWide() const { return IsWide; }
> + StringKind getKind() const { return static_cast<StringKind>(Kind); }
> + bool isAscii() const { return Kind == Ascii; }
> + bool isWide() const { return Kind == Wide; }
> + bool isUTF8() const { return Kind == UTF8; }
> + bool isUTF16() const { return Kind == UTF16; }
> + bool isUTF32() const { return Kind == UTF32; }
> bool isPascal() const { return IsPascal; }
> -
> +
> bool containsNonAsciiOrNull() const {
> StringRef Str = getString();
> for (unsigned i = 0, e = Str.size(); i != e; ++i)
>
> Modified: cfe/trunk/include/clang/AST/Type.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Type.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/AST/Type.h (original)
> +++ cfe/trunk/include/clang/AST/Type.h Wed Jul 27 00:40:30 2011
> @@ -1368,6 +1368,8 @@
> bool isBooleanType() const;
> bool isCharType() const;
> bool isWideCharType() const;
> + bool isChar16Type() const;
> + bool isChar32Type() const;
> bool isAnyCharacterType() const;
> bool isIntegralType(ASTContext &Ctx) const;
>
>
> Modified: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td (original)
> +++ cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td Wed Jul 27 00:40:30 2011
> @@ -77,8 +77,8 @@
> "invalid suffix '%0' on integer constant">;
> def err_invalid_suffix_float_constant : Error<
> "invalid suffix '%0' on floating constant">;
> -def warn_extraneous_wide_char_constant : Warning<
> - "extraneous characters in wide character constant ignored">;
> +def warn_extraneous_char_constant : Warning<
> + "extraneous characters in character constant ignored">;
> def warn_char_constant_too_large : Warning<
> "character constant too long for its type">;
> def err_exponent_has_no_digits : Error<"exponent has no digits">;
> @@ -102,6 +102,8 @@
> "character unicode escape sequence too long for its type">;
> def warn_ucn_not_valid_in_c89 : ExtWarn<
> "unicode escape sequences are only valid in C99 or C++">;
> +def err_unsupported_string_concat : Error<
> + "unsupported non-standard concatenation of string literals">;
>
> //===----------------------------------------------------------------------===//
> // PTH Diagnostics
>
> Modified: cfe/trunk/include/clang/Basic/IdentifierTable.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/IdentifierTable.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Basic/IdentifierTable.h (original)
> +++ cfe/trunk/include/clang/Basic/IdentifierTable.h Wed Jul 27 00:40:30 2011
> @@ -50,8 +50,8 @@
> /// set, and all tok::identifier tokens have a pointer to one of these.
> class IdentifierInfo {
> // Note: DON'T make TokenID a 'tok::TokenKind'; MSVC will treat it as a
> - // signed char and TokenKinds > 127 won't be handled correctly.
> - unsigned TokenID : 8; // Front-end token ID or tok::identifier.
> + // signed char and TokenKinds > 255 won't be handled correctly.
> + unsigned TokenID : 9; // Front-end token ID or tok::identifier.
> // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
> // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
> // are for builtins.
> @@ -65,7 +65,7 @@
> // file and wasn't modified since.
> bool RevertedTokenID : 1; // True if RevertTokenIDToIdentifier was
> // called.
> - // 6 bits left in 32-bit word.
> + // 5 bits left in 32-bit word.
> void *FETokenInfo; // Managed by the language front-end.
> llvm::StringMapEntry<IdentifierInfo*> *Entry;
>
> @@ -409,6 +409,7 @@
> IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
> IdentifierInfo &II = get(Name);
> II.TokenID = TokenCode;
> + assert(II.TokenID == TokenCode && "TokenCode too large");
> return II;
> }
>
>
> Modified: cfe/trunk/include/clang/Basic/TokenKinds.def
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TokenKinds.def?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Basic/TokenKinds.def (original)
> +++ cfe/trunk/include/clang/Basic/TokenKinds.def Wed Jul 27 00:40:30 2011
> @@ -114,13 +114,23 @@
> TOK(numeric_constant) // 0x123
>
> // C99 6.4.4: Character Constants
> -TOK(char_constant) // 'a' L'b'
> +TOK(char_constant) // 'a'
> +TOK(wide_char_constant) // L'b'
> +
> +// C++0x Character Constants
> +TOK(utf16_char_constant) // u'a'
> +TOK(utf32_char_constant) // U'a'
>
> // C99 6.4.5: String Literals.
> TOK(string_literal) // "foo"
> TOK(wide_string_literal) // L"foo"
> TOK(angle_string_literal)// <foo>
>
> +// C++0x String Literals.
> +TOK(utf8_string_literal) // u8"foo"
> +TOK(utf16_string_literal)// u"foo"
> +TOK(utf32_string_literal)// U"foo"
> +
> // C99 6.4.6: Punctuators.
> PUNCTUATOR(l_square, "[")
> PUNCTUATOR(r_square, "]")
>
> Modified: cfe/trunk/include/clang/Lex/Lexer.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Lexer.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Lex/Lexer.h (original)
> +++ cfe/trunk/include/clang/Lex/Lexer.h Wed Jul 27 00:40:30 2011
> @@ -471,9 +471,11 @@
> // Helper functions to lex the remainder of a token of the specific type.
> void LexIdentifier (Token &Result, const char *CurPtr);
> void LexNumericConstant (Token &Result, const char *CurPtr);
> - void LexStringLiteral (Token &Result, const char *CurPtr,bool Wide);
> + void LexStringLiteral (Token &Result, const char *CurPtr,
> + tok::TokenKind Kind);
> void LexAngledStringLiteral(Token &Result, const char *CurPtr);
> - void LexCharConstant (Token &Result, const char *CurPtr);
> + void LexCharConstant (Token &Result, const char *CurPtr,
> + tok::TokenKind Kind);
> bool LexEndOfFile (Token &Result, const char *CurPtr);
>
> bool SkipWhitespace (Token &Result, const char *CurPtr);
>
> Modified: cfe/trunk/include/clang/Lex/LiteralSupport.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/LiteralSupport.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Lex/LiteralSupport.h (original)
> +++ cfe/trunk/include/clang/Lex/LiteralSupport.h Wed Jul 27 00:40:30 2011
> @@ -19,6 +19,7 @@
> #include "llvm/ADT/APFloat.h"
> #include "llvm/ADT/SmallString.h"
> #include "llvm/Support/DataTypes.h"
> +#include "clang/Basic/TokenKinds.h"
> #include <cctype>
>
> namespace clang {
> @@ -124,15 +125,19 @@
> /// character literal.
> class CharLiteralParser {
> uint64_t Value;
> - bool IsWide;
> + tok::TokenKind Kind;
> bool IsMultiChar;
> bool HadError;
> public:
> CharLiteralParser(const char *begin, const char *end,
> - SourceLocation Loc, Preprocessor &PP);
> + SourceLocation Loc, Preprocessor &PP,
> + tok::TokenKind kind);
>
> bool hadError() const { return HadError; }
> - bool isWide() const { return IsWide; }
> + bool isAscii() const { return Kind == tok::char_constant; }
> + bool isWide() const { return Kind == tok::wide_char_constant; }
> + bool isUTF16() const { return Kind == tok::utf16_char_constant; }
> + bool isUTF32() const { return Kind == tok::utf32_char_constant; }
> bool isMultiChar() const { return IsMultiChar; }
> uint64_t getValue() const { return Value; }
> };
> @@ -148,7 +153,8 @@
>
> unsigned MaxTokenLength;
> unsigned SizeBound;
> - unsigned wchar_tByteWidth;
> + unsigned CharByteWidth;
> + tok::TokenKind Kind;
> llvm::SmallString<512> ResultBuf;
> char *ResultPtr; // cursor
> public:
> @@ -158,14 +164,13 @@
> const SourceManager &sm, const LangOptions &features,
> const TargetInfo &target, Diagnostic *diags = 0)
> : SM(sm), Features(features), Target(target), Diags(diags),
> - MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
> - ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
> + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
> + ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
> init(StringToks, NumStringToks);
> }
>
>
> bool hadError;
> - bool AnyWide;
> bool Pascal;
>
> StringRef GetString() const {
> @@ -174,9 +179,7 @@
> unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
>
> unsigned GetNumStringChars() const {
> - if (AnyWide)
> - return GetStringLength() / wchar_tByteWidth;
> - return GetStringLength();
> + return GetStringLength() / CharByteWidth;
> }
> /// getOffsetOfStringByte - This function returns the offset of the
> /// specified byte of the string data represented by Token. This handles
> @@ -185,7 +188,13 @@
> /// If the Diagnostics pointer is non-null, then this will do semantic
> /// checking of the string literal and emit errors and warnings.
> unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
> -
> +
> + bool isAscii() { return Kind == tok::string_literal; }
> + bool isWide() { return Kind == tok::wide_string_literal; }
> + bool isUTF8() { return Kind == tok::utf8_string_literal; }
> + bool isUTF16() { return Kind == tok::utf16_string_literal; }
> + bool isUTF32() { return Kind == tok::utf32_string_literal; }
> +
> private:
> void init(const Token *StringToks, unsigned NumStringToks);
> };
>
> Modified: cfe/trunk/include/clang/Lex/Token.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Token.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Lex/Token.h (original)
> +++ cfe/trunk/include/clang/Lex/Token.h Wed Jul 27 00:40:30 2011
> @@ -96,7 +96,10 @@
> /// constant, string, etc.
> bool isLiteral() const {
> return is(tok::numeric_constant) || is(tok::char_constant) ||
> - is(tok::string_literal) || is(tok::wide_string_literal) ||
> + is(tok::wide_char_constant) || is(tok::utf16_char_constant) ||
> + is(tok::utf32_char_constant) || is(tok::string_literal) ||
> + is(tok::wide_string_literal) || is(tok::utf8_string_literal) ||
> + is(tok::utf16_string_literal) || is(tok::utf32_string_literal) ||
> is(tok::angle_string_literal);
> }
>
>
> Modified: cfe/trunk/include/clang/Lex/TokenConcatenation.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/TokenConcatenation.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Lex/TokenConcatenation.h (original)
> +++ cfe/trunk/include/clang/Lex/TokenConcatenation.h Wed Jul 27 00:40:30 2011
> @@ -63,12 +63,9 @@
> const Token &Tok) const;
>
> private:
> - /// StartsWithL - Return true if the spelling of this token starts with 'L'.
> - bool StartsWithL(const Token &Tok) const;
> -
> - /// IsIdentifierL - Return true if the spelling of this token is literally
> - /// 'L'.
> - bool IsIdentifierL(const Token &Tok) const;
> + /// IsIdentifierStringPrefix - Return true if the spelling of the token
> + /// is literally 'L', 'u', 'U', or 'u8'.
> + bool IsIdentifierStringPrefix(const Token &Tok) const;
> };
> } // end clang namespace
>
>
> Modified: cfe/trunk/include/clang/Parse/Parser.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Parse/Parser.h?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Parse/Parser.h (original)
> +++ cfe/trunk/include/clang/Parse/Parser.h Wed Jul 27 00:40:30 2011
> @@ -265,7 +265,10 @@
> ///
> bool isTokenStringLiteral() const {
> return Tok.getKind() == tok::string_literal ||
> - Tok.getKind() == tok::wide_string_literal;
> + Tok.getKind() == tok::wide_string_literal ||
> + Tok.getKind() == tok::utf8_string_literal ||
> + Tok.getKind() == tok::utf16_string_literal ||
> + Tok.getKind() == tok::utf32_string_literal;
> }
>
> /// \brief Returns true if the current token is a '=' or '==' and
>
> Modified: cfe/trunk/lib/AST/ASTImporter.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTImporter.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/ASTImporter.cpp (original)
> +++ cfe/trunk/lib/AST/ASTImporter.cpp Wed Jul 27 00:40:30 2011
> @@ -3814,8 +3814,8 @@
> if (T.isNull())
> return 0;
>
> - return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
> - E->isWide(), T,
> + return new (Importer.getToContext()) CharacterLiteral(E->getValue(),
> + E->getKind(), T,
> Importer.Import(E->getLocation()));
> }
>
>
> Modified: cfe/trunk/lib/AST/Expr.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Expr.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/Expr.cpp (original)
> +++ cfe/trunk/lib/AST/Expr.cpp Wed Jul 27 00:40:30 2011
> @@ -533,8 +533,7 @@
> }
>
> StringLiteral *StringLiteral::Create(ASTContext &C, StringRef Str,
> - bool Wide,
> - bool Pascal, QualType Ty,
> + StringKind Kind, bool Pascal, QualType Ty,
> const SourceLocation *Loc,
> unsigned NumStrs) {
> // Allocate enough space for the StringLiteral plus an array of locations for
> @@ -549,7 +548,7 @@
> memcpy(AStrData, Str.data(), Str.size());
> SL->StrData = AStrData;
> SL->ByteLength = Str.size();
> - SL->IsWide = Wide;
> + SL->Kind = Kind;
> SL->IsPascal = Pascal;
> SL->TokLocs[0] = Loc[0];
> SL->NumConcatenated = NumStrs;
> @@ -587,8 +586,8 @@
> SourceLocation StringLiteral::
> getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
> const LangOptions &Features, const TargetInfo &Target) const {
> - assert(!isWide() && "This doesn't work for wide strings yet");
> -
> + assert(Kind == StringLiteral::Ascii && "This only works for ASCII strings");
> +
> // Loop over all of the tokens in this string until we find the one that
> // contains the byte we're looking for.
> unsigned TokNo = 0;
>
> Modified: cfe/trunk/lib/AST/StmtDumper.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtDumper.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/StmtDumper.cpp (original)
> +++ cfe/trunk/lib/AST/StmtDumper.cpp Wed Jul 27 00:40:30 2011
> @@ -443,8 +443,13 @@
> DumpExpr(Str);
> // FIXME: this doesn't print wstrings right.
> OS << " ";
> - if (Str->isWide())
> - OS << "L";
> + switch (Str->getKind()) {
> + case StringLiteral::Ascii: break; // No prefix
> + case StringLiteral::Wide: OS << 'L'; break;
> + case StringLiteral::UTF8: OS << "u8"; break;
> + case StringLiteral::UTF16: OS << 'u'; break;
> + case StringLiteral::UTF32: OS << 'U'; break;
> + }
> OS << '"';
> OS.write_escaped(Str->getString());
> OS << '"';
>
> Modified: cfe/trunk/lib/AST/StmtPrinter.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtPrinter.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/StmtPrinter.cpp (original)
> +++ cfe/trunk/lib/AST/StmtPrinter.cpp Wed Jul 27 00:40:30 2011
> @@ -599,8 +599,14 @@
>
> void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
> unsigned value = Node->getValue();
> - if (Node->isWide())
> - OS << "L";
> +
> + switch (Node->getKind()) {
> + case CharacterLiteral::Ascii: break; // no prefix.
> + case CharacterLiteral::Wide: OS << 'L'; break;
> + case CharacterLiteral::UTF16: OS << 'u'; break;
> + case CharacterLiteral::UTF32: OS << 'U'; break;
> + }
> +
> switch (value) {
> case '\\':
> OS << "'\\\\'";
> @@ -672,7 +678,13 @@
> }
>
> void StmtPrinter::VisitStringLiteral(StringLiteral *Str) {
> - if (Str->isWide()) OS << 'L';
> + switch (Str->getKind()) {
> + case StringLiteral::Ascii: break; // no prefix.
> + case StringLiteral::Wide: OS << 'L'; break;
> + case StringLiteral::UTF8: OS << "u8"; break;
> + case StringLiteral::UTF16: OS << 'u'; break;
> + case StringLiteral::UTF32: OS << 'U'; break;
> + }
> OS << '"';
>
> // FIXME: this doesn't print wstrings right.
>
> Modified: cfe/trunk/lib/AST/StmtProfile.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtProfile.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/StmtProfile.cpp (original)
> +++ cfe/trunk/lib/AST/StmtProfile.cpp Wed Jul 27 00:40:30 2011
> @@ -252,7 +252,7 @@
>
> void StmtProfiler::VisitCharacterLiteral(const CharacterLiteral *S) {
> VisitExpr(S);
> - ID.AddBoolean(S->isWide());
> + ID.AddInteger(S->getKind());
> ID.AddInteger(S->getValue());
> }
>
> @@ -269,7 +269,7 @@
> void StmtProfiler::VisitStringLiteral(const StringLiteral *S) {
> VisitExpr(S);
> ID.AddString(S->getString());
> - ID.AddBoolean(S->isWide());
> + ID.AddInteger(S->getKind());
> }
>
> void StmtProfiler::VisitParenExpr(const ParenExpr *S) {
>
> Modified: cfe/trunk/lib/AST/Type.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Type.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/Type.cpp (original)
> +++ cfe/trunk/lib/AST/Type.cpp Wed Jul 27 00:40:30 2011
> @@ -635,6 +635,18 @@
> return false;
> }
>
> +bool Type::isChar16Type() const {
> + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
> + return BT->getKind() == BuiltinType::Char16;
> + return false;
> +}
> +
> +bool Type::isChar32Type() const {
> + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType))
> + return BT->getKind() == BuiltinType::Char32;
> + return false;
> +}
> +
> /// \brief Determine whether this type is any of the built-in character
> /// types.
> bool Type::isAnyCharacterType() const {
>
> Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Wed Jul 27 00:40:30 2011
> @@ -1877,8 +1877,20 @@
> // Resize the string to the right size.
> uint64_t RealLen = CAT->getSize().getZExtValue();
>
> - if (E->isWide())
> + switch (E->getKind()) {
> + case StringLiteral::Ascii:
> + case StringLiteral::UTF8:
> + break;
> + case StringLiteral::Wide:
> RealLen *= Context.Target.getWCharWidth() / Context.getCharWidth();
> + break;
> + case StringLiteral::UTF16:
> + RealLen *= Context.Target.getChar16Width() / Context.getCharWidth();
> + break;
> + case StringLiteral::UTF32:
> + RealLen *= Context.Target.getChar32Width() / Context.getCharWidth();
> + break;
> + }
>
> std::string Str = E->getString().str();
> Str.resize(RealLen, '\0');
> @@ -1893,7 +1905,7 @@
> // FIXME: This can be more efficient.
> // FIXME: We shouldn't need to bitcast the constant in the wide string case.
> llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S));
> - if (S->isWide()) {
> + if (S->isWide() || S->isUTF16() || S->isUTF32()) {
> llvm::Type *DestTy =
> llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType()));
> C = llvm::ConstantExpr::getBitCast(C, DestTy);
>
> Modified: cfe/trunk/lib/Lex/Lexer.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/Lexer.cpp (original)
> +++ cfe/trunk/lib/Lex/Lexer.cpp Wed Jul 27 00:40:30 2011
> @@ -1267,8 +1267,9 @@
> }
>
> /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
> -/// either " or L".
> -void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
> +/// either " or L" or u8" or u" or U".
> +void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
> + tok::TokenKind Kind) {
> const char *NulCharacter = 0; // Does this string contain the \0 character?
>
> char C = getAndAdvanceChar(CurPtr, Result);
> @@ -1299,8 +1300,7 @@
>
> // Update the location of the token as well as the BufferPtr instance var.
> const char *TokStart = BufferPtr;
> - FormTokenWithChars(Result, CurPtr,
> - Wide ? tok::wide_string_literal : tok::string_literal);
> + FormTokenWithChars(Result, CurPtr, Kind);
> Result.setLiteralData(TokStart);
> }
>
> @@ -1339,8 +1339,9 @@
>
>
> /// LexCharConstant - Lex the remainder of a character constant, after having
> -/// lexed either ' or L'.
> -void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
> +/// lexed either ' or L' or u' or U'.
> +void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
> + tok::TokenKind Kind) {
> const char *NulCharacter = 0; // Does this character contain the \0 character?
>
> char C = getAndAdvanceChar(CurPtr, Result);
> @@ -1377,7 +1378,7 @@
>
> // Update the location of token as well as BufferPtr.
> const char *TokStart = BufferPtr;
> - FormTokenWithChars(Result, CurPtr, tok::char_constant);
> + FormTokenWithChars(Result, CurPtr, Kind);
> Result.setLiteralData(TokStart);
> }
>
> @@ -2185,6 +2186,55 @@
> MIOpt.ReadToken();
> return LexNumericConstant(Result, CurPtr);
>
> + case 'u': // Identifier (uber) or C++0x UTF-8 or UTF-16 string literal
> + // Notify MIOpt that we read a non-whitespace/non-comment token.
> + MIOpt.ReadToken();
> +
> + if (Features.CPlusPlus0x) {
> + Char = getCharAndSize(CurPtr, SizeTmp);
> +
> + // UTF-16 string literal
> + if (Char == '"')
> + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
> + tok::utf16_string_literal);
> +
> + // UTF-16 character constant
> + if (Char == '\'')
> + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
> + tok::utf16_char_constant);
> +
> + // UTF-8 string literal
> + if (Char == '8' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"')
> + return LexStringLiteral(Result,
> + ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
> + SizeTmp2, Result),
> + tok::utf8_string_literal);
> + }
> +
> + // treat u like the start of an identifier.
> + return LexIdentifier(Result, CurPtr);
> +
> + case 'U': // Identifier (Uber) or C++0x UTF-32 string literal
> + // Notify MIOpt that we read a non-whitespace/non-comment token.
> + MIOpt.ReadToken();
> +
> + if (Features.CPlusPlus0x) {
> + Char = getCharAndSize(CurPtr, SizeTmp);
> +
> + // UTF-32 string literal
> + if (Char == '"')
> + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
> + tok::utf32_string_literal);
> +
> + // UTF-32 character constant
> + if (Char == '\'')
> + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
> + tok::utf32_char_constant);
> + }
> +
> + // treat U like the start of an identifier.
> + return LexIdentifier(Result, CurPtr);
> +
> case 'L': // Identifier (Loony) or wide literal (L'x' or L"xyz").
> // Notify MIOpt that we read a non-whitespace/non-comment token.
> MIOpt.ReadToken();
> @@ -2193,21 +2243,22 @@
> // Wide string literal.
> if (Char == '"')
> return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
> - true);
> + tok::wide_string_literal);
>
> // Wide character constant.
> if (Char == '\'')
> - return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
> + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
> + tok::wide_char_constant);
> // FALL THROUGH, treating L like the start of an identifier.
>
> // C99 6.4.2: Identifiers.
> case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
> case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N':
> - case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
> + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': /*'U'*/
> case 'V': case 'W': case 'X': case 'Y': case 'Z':
> case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
> case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
> - case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
> + case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/
> case 'v': case 'w': case 'x': case 'y': case 'z':
> case '_':
> // Notify MIOpt that we read a non-whitespace/non-comment token.
> @@ -2230,13 +2281,13 @@
> case '\'':
> // Notify MIOpt that we read a non-whitespace/non-comment token.
> MIOpt.ReadToken();
> - return LexCharConstant(Result, CurPtr);
> + return LexCharConstant(Result, CurPtr, tok::char_constant);
>
> // C99 6.4.5: String Literals.
> case '"':
> // Notify MIOpt that we read a non-whitespace/non-comment token.
> MIOpt.ReadToken();
> - return LexStringLiteral(Result, CurPtr, false);
> + return LexStringLiteral(Result, CurPtr, tok::string_literal);
>
> // C99 6.4.6: Punctuators.
> case '?':
>
> Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
> +++ cfe/trunk/lib/Lex/LiteralSupport.cpp Wed Jul 27 00:40:30 2011
> @@ -28,12 +28,31 @@
> return -1;
> }
>
> +static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
> + switch (kind) {
> + default: assert(0 && "Unknown token type!");
> + case tok::char_constant:
> + case tok::string_literal:
> + case tok::utf8_string_literal:
> + return Target.getCharWidth();
> + case tok::wide_char_constant:
> + case tok::wide_string_literal:
> + return Target.getWCharWidth();
> + case tok::utf16_char_constant:
> + case tok::utf16_string_literal:
> + return Target.getChar16Width();
> + case tok::utf32_char_constant:
> + case tok::utf32_string_literal:
> + return Target.getChar32Width();
> + }
> +}
> +
> /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
> /// either a character or a string literal.
> static unsigned ProcessCharEscape(const char *&ThisTokBuf,
> const char *ThisTokEnd, bool &HadError,
> - FullSourceLoc Loc, bool IsWide,
> - Diagnostic *Diags, const TargetInfo &Target) {
> + FullSourceLoc Loc, unsigned CharWidth,
> + Diagnostic *Diags) {
> // Skip the '\' char.
> ++ThisTokBuf;
>
> @@ -98,9 +117,6 @@
> }
>
> // See if any bits will be truncated when evaluated as a character.
> - unsigned CharWidth =
> - IsWide ? Target.getWCharWidth() : Target.getCharWidth();
> -
> if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
> Overflow = true;
> ResultChar &= ~0U >> (32-CharWidth);
> @@ -128,9 +144,6 @@
> ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
>
> // Check for overflow. Reject '\777', but not L'\777'.
> - unsigned CharWidth =
> - IsWide ? Target.getWCharWidth() : Target.getCharWidth();
> -
> if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
> if (Diags)
> Diags->Report(Loc, diag::warn_octal_escape_too_large);
> @@ -219,8 +232,8 @@
> /// we will likely rework our support for UCN's.
> static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
> char *&ResultBuf, bool &HadError,
> - FullSourceLoc Loc, bool wide, Diagnostic *Diags,
> - const LangOptions &Features) {
> + FullSourceLoc Loc, unsigned CharByteWidth,
> + Diagnostic *Diags, const LangOptions &Features) {
> typedef uint32_t UTF32;
> UTF32 UcnVal = 0;
> unsigned short UcnLen = 0;
> @@ -230,19 +243,22 @@
> return;
> }
>
> - if (wide) {
> - (void)UcnLen;
> - assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
> + assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
> + "only character widths of 1, 2, or 4 bytes supported");
>
> - if (!Features.ShortWChar) {
> - // Note: our internal rep of wide char tokens is always little-endian.
> - *ResultBuf++ = (UcnVal & 0x000000FF);
> - *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
> - *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
> - *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
> - return;
> - }
> + (void)UcnLen;
> + assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
> +
> + if (CharByteWidth == 4) {
> + // Note: our internal rep of wide char tokens is always little-endian.
> + *ResultBuf++ = (UcnVal & 0x000000FF);
> + *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
> + *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
> + *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
> + return;
> + }
>
> + if (CharByteWidth == 2) {
> // Convert to UTF16.
> if (UcnVal < (UTF32)0xFFFF) {
> *ResultBuf++ = (UcnVal & 0x000000FF);
> @@ -261,6 +277,9 @@
> *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
> return;
> }
> +
> + assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
> +
> // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
> // The conversion below was inspired by:
> // http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
> @@ -695,13 +714,18 @@
>
>
> CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
> - SourceLocation Loc, Preprocessor &PP) {
> + SourceLocation Loc, Preprocessor &PP,
> + tok::TokenKind kind) {
> // At this point we know that the character matches the regex "L?'.*'".
> HadError = false;
>
> - // Determine if this is a wide character.
> - IsWide = begin[0] == 'L';
> - if (IsWide) ++begin;
> + Kind = kind;
> +
> + // Determine if this is a wide or UTF character.
> + if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant ||
> + Kind == tok::utf32_char_constant) {
> + ++begin;
> + }
>
> // Skip over the entry quote.
> assert(begin[0] == '\'' && "Invalid token lexed");
> @@ -742,17 +766,17 @@
> ResultChar = utf32;
> } else {
> // Otherwise, this is a non-UCN escape character. Process it.
> + unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
> ResultChar = ProcessCharEscape(begin, end, HadError,
> FullSourceLoc(Loc,PP.getSourceManager()),
> - IsWide,
> - &PP.getDiagnostics(), PP.getTargetInfo());
> + CharWidth, &PP.getDiagnostics());
> }
> }
>
> // If this is a multi-character constant (e.g. 'abc'), handle it. These are
> // implementation defined (C99 6.4.4.4p10).
> if (NumCharsSoFar) {
> - if (IsWide) {
> + if (!isAscii()) {
> // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
> LitVal = 0;
> } else {
> @@ -774,8 +798,8 @@
> if (NumCharsSoFar > 1) {
> // Warn about discarding the top bits for multi-char wide-character
> // constants (L'abcd').
> - if (IsWide)
> - PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
> + if (!isAscii())
> + PP.Diag(Loc, diag::warn_extraneous_char_constant);
> else if (NumCharsSoFar != 4)
> PP.Diag(Loc, diag::ext_multichar_character_literal);
> else
> @@ -787,14 +811,15 @@
> // Transfer the value from APInt to uint64_t
> Value = LitVal.getZExtValue();
>
> - if (IsWide && PP.getLangOptions().ShortWChar && Value > 0xFFFF)
> + if (((isWide() && PP.getLangOptions().ShortWChar) || isUTF16()) &&
> + Value > 0xFFFF)
> PP.Diag(Loc, diag::warn_ucn_escape_too_large);
>
> // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
> // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple
> // character constants are not sign extended in the this implementation:
> // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
> - if (!IsWide && NumCharsSoFar == 1 && (Value & 128) &&
> + if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
> PP.getLangOptions().CharIsSigned)
> Value = (signed char)Value;
> }
> @@ -839,8 +864,8 @@
> Preprocessor &PP, bool Complain)
> : SM(PP.getSourceManager()), Features(PP.getLangOptions()),
> Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
> - MaxTokenLength(0), SizeBound(0), wchar_tByteWidth(0),
> - ResultPtr(ResultBuf.data()), hadError(false), AnyWide(false), Pascal(false) {
> + MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
> + ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
> init(StringToks, NumStringToks);
> }
>
> @@ -860,7 +885,7 @@
> MaxTokenLength = StringToks[0].getLength();
> assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
> SizeBound = StringToks[0].getLength()-2; // -2 for "".
> - AnyWide = StringToks[0].is(tok::wide_string_literal);
> + Kind = StringToks[0].getKind();
>
> hadError = false;
>
> @@ -881,8 +906,18 @@
> if (StringToks[i].getLength() > MaxTokenLength)
> MaxTokenLength = StringToks[i].getLength();
>
> - // Remember if we see any wide strings.
> - AnyWide |= StringToks[i].is(tok::wide_string_literal);
> + // Remember if we see any wide or utf-8/16/32 strings.
> + // Also check for illegal concatenations.
> + if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
> + if (isAscii()) {
> + Kind = StringToks[i].getKind();
> + } else {
> + if (Diags)
> + Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
> + diag::err_unsupported_string_concat);
> + hadError = true;
> + }
> + }
> }
>
> // Include space for the null terminator.
> @@ -890,19 +925,14 @@
>
> // TODO: K&R warning: "traditional C rejects string constant concatenation"
>
> - // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not
> - // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true.
> - wchar_tByteWidth = ~0U;
> - if (AnyWide) {
> - wchar_tByteWidth = Target.getWCharWidth();
> - assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
> - wchar_tByteWidth /= 8;
> - }
> + // Get the width in bytes of char/wchar_t/char16_t/char32_t
> + CharByteWidth = getCharWidth(Kind, Target);
> + assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
> + CharByteWidth /= 8;
>
> // The output buffer size needs to be large enough to hold wide characters.
> // This is a worst-case assumption which basically corresponds to L"" "long".
> - if (AnyWide)
> - SizeBound *= wchar_tByteWidth;
> + SizeBound *= CharByteWidth;
>
> // Size the temporary buffer to hold the result string data.
> ResultBuf.resize(SizeBound);
> @@ -927,18 +957,19 @@
> Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
> &StringInvalid);
> if (StringInvalid) {
> - hadError = 1;
> + hadError = true;
> continue;
> }
>
> const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
> - bool wide = false;
> // TODO: Input character set mapping support.
>
> // Skip L marker for wide strings.
> - if (ThisTokBuf[0] == 'L') {
> - wide = true;
> + if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
> ++ThisTokBuf;
> + // Skip 8 of u8 marker for utf8 strings.
> + if (ThisTokBuf[0] == '8')
> + ++ThisTokBuf;
> }
>
> assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
> @@ -967,7 +998,7 @@
>
> // Copy the character span over.
> unsigned Len = ThisTokBuf-InStart;
> - if (!AnyWide) {
> + if (CharByteWidth == 1) {
> memcpy(ResultPtr, InStart, Len);
> ResultPtr += Len;
> } else {
> @@ -975,7 +1006,7 @@
> for (; Len; --Len, ++InStart) {
> *ResultPtr++ = InStart[0];
> // Add zeros at the end.
> - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
> + for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
> *ResultPtr++ = 0;
> }
> }
> @@ -985,29 +1016,26 @@
> if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
> EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
> hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
> - wide, Diags, Features);
> + CharByteWidth, Diags, Features);
> continue;
> }
> // Otherwise, this is a non-UCN escape character. Process it.
> unsigned ResultChar =
> ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
> FullSourceLoc(StringToks[i].getLocation(), SM),
> - AnyWide, Diags, Target);
> + CharByteWidth*8, Diags);
>
> // Note: our internal rep of wide char tokens is always little-endian.
> *ResultPtr++ = ResultChar & 0xFF;
>
> - if (AnyWide) {
> - for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
> - *ResultPtr++ = ResultChar >> i*8;
> - }
> + for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
> + *ResultPtr++ = ResultChar >> i*8;
> }
> }
>
> if (Pascal) {
> ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
> - if (AnyWide)
> - ResultBuf[0] /= wchar_tByteWidth;
> + ResultBuf[0] /= CharByteWidth;
>
> // Verify that pascal strings aren't too large.
> if (GetStringLength() > 256) {
> @@ -1016,7 +1044,7 @@
> diag::err_pascal_string_too_long)
> << SourceRange(StringToks[0].getLocation(),
> StringToks[NumStringToks-1].getLocation());
> - hadError = 1;
> + hadError = true;
> return;
> }
> } else if (Diags) {
> @@ -1050,7 +1078,8 @@
> if (StringInvalid)
> return 0;
>
> - assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
> + assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
> + SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
>
>
> const char *SpellingStart = SpellingPtr;
> @@ -1075,7 +1104,7 @@
> bool HadError = false;
> ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
> FullSourceLoc(Tok.getLocation(), SM),
> - false, Diags, Target);
> + CharByteWidth*8, Diags);
> assert(!HadError && "This method isn't valid on erroneous strings");
> --ByteNo;
> }
>
> Modified: cfe/trunk/lib/Lex/MacroArgs.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/MacroArgs.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/MacroArgs.cpp (original)
> +++ cfe/trunk/lib/Lex/MacroArgs.cpp Wed Jul 27 00:40:30 2011
> @@ -208,7 +208,13 @@
> // by 6.10.3.2p2.
> if (Tok.is(tok::string_literal) || // "foo"
> Tok.is(tok::wide_string_literal) || // L"foo"
> - Tok.is(tok::char_constant)) { // 'x' and L'x'.
> + Tok.is(tok::utf8_string_literal) || // u8"foo"
> + Tok.is(tok::utf16_string_literal) || // u"foo"
> + Tok.is(tok::utf32_string_literal) || // U"foo"
> + Tok.is(tok::char_constant) || // 'x'
> + Tok.is(tok::wide_char_constant) || // L'x'.
> + Tok.is(tok::utf16_char_constant) || // u'x'.
> + Tok.is(tok::utf32_char_constant)) { // U'x'.
> bool Invalid = false;
> std::string TokStr = PP.getSpelling(Tok, &Invalid);
> if (!Invalid) {
>
> Modified: cfe/trunk/lib/Lex/PPDirectives.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPDirectives.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/PPDirectives.cpp (original)
> +++ cfe/trunk/lib/Lex/PPDirectives.cpp Wed Jul 27 00:40:30 2011
> @@ -777,7 +777,7 @@
> } else {
> // Parse and validate the string, converting it into a unique ID.
> StringLiteralParser Literal(&StrTok, 1, *this);
> - assert(!Literal.AnyWide && "Didn't allow wide strings in");
> + assert(Literal.isAscii() && "Didn't allow wide strings in");
> if (Literal.hadError)
> return DiscardUntilEndOfDirective();
> if (Literal.Pascal) {
> @@ -910,7 +910,7 @@
> } else {
> // Parse and validate the string, converting it into a unique ID.
> StringLiteralParser Literal(&StrTok, 1, *this);
> - assert(!Literal.AnyWide && "Didn't allow wide strings in");
> + assert(Literal.isAscii() && "Didn't allow wide strings in");
> if (Literal.hadError)
> return DiscardUntilEndOfDirective();
> if (Literal.Pascal) {
>
> Modified: cfe/trunk/lib/Lex/PPExpressions.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPExpressions.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/PPExpressions.cpp (original)
> +++ cfe/trunk/lib/Lex/PPExpressions.cpp Wed Jul 27 00:40:30 2011
> @@ -236,7 +236,10 @@
> PP.LexNonComment(PeekTok);
> return false;
> }
> - case tok::char_constant: { // 'x'
> + case tok::char_constant: // 'x'
> + case tok::wide_char_constant: { // L'x'
> + case tok::utf16_char_constant: // u'x'
> + case tok::utf32_char_constant: // U'x'
> llvm::SmallString<32> CharBuffer;
> bool CharInvalid = false;
> StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
> @@ -244,7 +247,7 @@
> return true;
>
> CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(),
> - PeekTok.getLocation(), PP);
> + PeekTok.getLocation(), PP, PeekTok.getKind());
> if (Literal.hadError())
> return true; // A diagnostic was already emitted.
>
> @@ -255,6 +258,10 @@
> NumBits = TI.getIntWidth();
> else if (Literal.isWide())
> NumBits = TI.getWCharWidth();
> + else if (Literal.isUTF16())
> + NumBits = TI.getChar16Width();
> + else if (Literal.isUTF32())
> + NumBits = TI.getChar32Width();
> else
> NumBits = TI.getCharWidth();
>
> @@ -262,8 +269,9 @@
> llvm::APSInt Val(NumBits);
> // Set the value.
> Val = Literal.getValue();
> - // Set the signedness.
> - Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);
> + // Set the signedness. UTF-16 and UTF-32 are always unsigned
> + if (!Literal.isUTF16() && !Literal.isUTF32())
> + Val.setIsUnsigned(!PP.getLangOptions().CharIsSigned);
>
> if (Result.Val.getBitWidth() > Val.getBitWidth()) {
> Result.Val = Val.extend(Result.Val.getBitWidth());
>
> Modified: cfe/trunk/lib/Lex/Pragma.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Pragma.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/Pragma.cpp (original)
> +++ cfe/trunk/lib/Lex/Pragma.cpp Wed Jul 27 00:40:30 2011
> @@ -444,7 +444,7 @@
>
> // Concatenate and parse the strings.
> StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
> - assert(!Literal.AnyWide && "Didn't allow wide strings in");
> + assert(Literal.isAscii() && "Didn't allow wide strings in");
> if (Literal.hadError)
> return;
> if (Literal.Pascal) {
> @@ -520,7 +520,7 @@
>
> // Concatenate and parse the strings.
> StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
> - assert(!Literal.AnyWide && "Didn't allow wide strings in");
> + assert(Literal.isAscii() && "Didn't allow wide strings in");
> if (Literal.hadError)
> return;
> if (Literal.Pascal) {
> @@ -902,7 +902,7 @@
>
> // Concatenate and parse the strings.
> StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);
> - assert(!Literal.AnyWide && "Didn't allow wide strings in");
> + assert(Literal.isAscii() && "Didn't allow wide strings in");
> if (Literal.hadError)
> return;
> if (Literal.Pascal) {
>
> Modified: cfe/trunk/lib/Lex/TokenConcatenation.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/TokenConcatenation.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Lex/TokenConcatenation.cpp (original)
> +++ cfe/trunk/lib/Lex/TokenConcatenation.cpp Wed Jul 27 00:40:30 2011
> @@ -17,42 +17,39 @@
> using namespace clang;
>
>
> -/// StartsWithL - Return true if the spelling of this token starts with 'L'.
> -bool TokenConcatenation::StartsWithL(const Token &Tok) const {
> - if (!Tok.needsCleaning()) {
> - SourceManager &SM = PP.getSourceManager();
> - return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
> - }
> -
> - if (Tok.getLength() < 256) {
> - char Buffer[256];
> - const char *TokPtr = Buffer;
> - PP.getSpelling(Tok, TokPtr);
> - return TokPtr[0] == 'L';
> - }
> -
> - return PP.getSpelling(Tok)[0] == 'L';
> -}
> +/// IsIdentifierStringPrefix - Return true if the spelling of the token
> +/// is literally 'L', 'u', 'U', or 'u8'.
> +bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const {
> + const LangOptions &LangOpts = PP.getLangOptions();
>
> -/// IsIdentifierL - Return true if the spelling of this token is literally
> -/// 'L'.
> -bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
> if (!Tok.needsCleaning()) {
> - if (Tok.getLength() != 1)
> + if (Tok.getLength() != 1 && Tok.getLength() != 2)
> return false;
> SourceManager &SM = PP.getSourceManager();
> - return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
> + const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
> + if (Tok.getLength() == 1)
> + return Ptr[0] == 'L' ||
> + (LangOpts.CPlusPlus0x && (Ptr[0] == 'u' || Ptr[0] == 'U'));
> + if (Tok.getLength() == 2)
> + return LangOpts.CPlusPlus0x && Ptr[0] == 'u' && Ptr[1] == '8';
> }
>
> if (Tok.getLength() < 256) {
> char Buffer[256];
> const char *TokPtr = Buffer;
> - if (PP.getSpelling(Tok, TokPtr) != 1)
> - return false;
> - return TokPtr[0] == 'L';
> + unsigned length = PP.getSpelling(Tok, TokPtr);
> + if (length == 1)
> + return TokPtr[0] == 'L' ||
> + (LangOpts.CPlusPlus0x && (TokPtr[0] == 'u' || TokPtr[0] == 'U'));
> + if (length == 2)
> + return LangOpts.CPlusPlus0x && TokPtr[0] == 'u' && TokPtr[1] == '8';
> + return false;
> }
>
> - return PP.getSpelling(Tok) == "L";
> + std::string TokStr = PP.getSpelling(Tok);
> + return TokStr == "L" || (LangOpts.CPlusPlus0x && (TokStr == "u8" ||
> + TokStr == "u" ||
> + TokStr == "U"));
> }
>
> TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
> @@ -179,24 +176,19 @@
> if (Tok.is(tok::numeric_constant))
> return GetFirstChar(PP, Tok) != '.';
>
> - if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||
> - Tok.is(tok::wide_char_literal)*/)
> + if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) ||
> + Tok.is(tok::utf8_string_literal) || Tok.is(tok::utf16_string_literal) ||
> + Tok.is(tok::utf32_string_literal) || Tok.is(tok::wide_char_constant) ||
> + Tok.is(tok::utf16_char_constant) || Tok.is(tok::utf32_char_constant))
> return true;
>
> // If this isn't identifier + string, we're done.
> if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
> return false;
>
> - // FIXME: need a wide_char_constant!
> -
> - // If the string was a wide string L"foo" or wide char L'f', it would
> - // concat with the previous identifier into fooL"bar". Avoid this.
> - if (StartsWithL(Tok))
> - return true;
> -
> // Otherwise, this is a narrow character or string. If the *identifier*
> - // is a literal 'L', avoid pasting L "foo" -> L"foo".
> - return IsIdentifierL(PrevTok);
> + // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
> + return IsIdentifierStringPrefix(PrevTok);
> case tok::numeric_constant:
> return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
> FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
>
> Modified: cfe/trunk/lib/Parse/ParseCXXInlineMethods.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/ParseCXXInlineMethods.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Parse/ParseCXXInlineMethods.cpp (original)
> +++ cfe/trunk/lib/Parse/ParseCXXInlineMethods.cpp Wed Jul 27 00:40:30 2011
> @@ -553,6 +553,9 @@
>
> case tok::string_literal:
> case tok::wide_string_literal:
> + case tok::utf8_string_literal:
> + case tok::utf16_string_literal:
> + case tok::utf32_string_literal:
> Toks.push_back(Tok);
> ConsumeStringToken();
> break;
>
> Modified: cfe/trunk/lib/Parse/ParseExpr.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/ParseExpr.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Parse/ParseExpr.cpp (original)
> +++ cfe/trunk/lib/Parse/ParseExpr.cpp Wed Jul 27 00:40:30 2011
> @@ -769,6 +769,9 @@
> break;
> }
> case tok::char_constant: // constant: character-constant
> + case tok::wide_char_constant:
> + case tok::utf16_char_constant:
> + case tok::utf32_char_constant:
> Res = Actions.ActOnCharacterConstant(Tok);
> ConsumeToken();
> break;
> @@ -780,6 +783,9 @@
> break;
> case tok::string_literal: // primary-expression: string-literal
> case tok::wide_string_literal:
> + case tok::utf8_string_literal:
> + case tok::utf16_string_literal:
> + case tok::utf32_string_literal:
> Res = ParseStringLiteralExpression();
> break;
> case tok::kw__Generic: // primary-expression: generic-selection [C1X 6.5.1]
>
> Modified: cfe/trunk/lib/Parse/ParseTentative.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/ParseTentative.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Parse/ParseTentative.cpp (original)
> +++ cfe/trunk/lib/Parse/ParseTentative.cpp Wed Jul 27 00:40:30 2011
> @@ -605,8 +605,14 @@
> // Obviously starts an expression.
> case tok::numeric_constant:
> case tok::char_constant:
> + case tok::wide_char_constant:
> + case tok::utf16_char_constant:
> + case tok::utf32_char_constant:
> case tok::string_literal:
> case tok::wide_string_literal:
> + case tok::utf8_string_literal:
> + case tok::utf16_string_literal:
> + case tok::utf32_string_literal:
> case tok::l_square:
> case tok::l_paren:
> case tok::amp:
>
> Modified: cfe/trunk/lib/Parse/Parser.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/Parser.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Parse/Parser.cpp (original)
> +++ cfe/trunk/lib/Parse/Parser.cpp Wed Jul 27 00:40:30 2011
> @@ -298,6 +298,9 @@
>
> case tok::string_literal:
> case tok::wide_string_literal:
> + case tok::utf8_string_literal:
> + case tok::utf16_string_literal:
> + case tok::utf32_string_literal:
> ConsumeStringToken();
> break;
>
>
> Modified: cfe/trunk/lib/Rewrite/HTMLRewrite.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Rewrite/HTMLRewrite.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Rewrite/HTMLRewrite.cpp (original)
> +++ cfe/trunk/lib/Rewrite/HTMLRewrite.cpp Wed Jul 27 00:40:30 2011
> @@ -397,8 +397,15 @@
> HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
> "<span class='comment'>", "</span>");
> break;
> + case tok::utf8_string_literal:
> + // Chop off the u part of u8 prefix
> + ++TokOffs;
> + --TokLen;
> + // FALL THROUGH to chop the 8
> case tok::wide_string_literal:
> - // Chop off the L prefix
> + case tok::utf16_string_literal:
> + case tok::utf32_string_literal:
> + // Chop off the L, u, U or 8 prefix
> ++TokOffs;
> --TokLen;
> // FALL THROUGH.
>
> Modified: cfe/trunk/lib/Rewrite/RewriteObjC.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Rewrite/RewriteObjC.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Rewrite/RewriteObjC.cpp (original)
> +++ cfe/trunk/lib/Rewrite/RewriteObjC.cpp Wed Jul 27 00:40:30 2011
> @@ -2111,8 +2111,8 @@
> std::string StrEncoding;
> Context->getObjCEncodingForType(Exp->getEncodedType(), StrEncoding);
> Expr *Replacement = StringLiteral::Create(*Context, StrEncoding,
> - false, false, StrType,
> - SourceLocation());
> + StringLiteral::Ascii, false,
> + StrType, SourceLocation());
> ReplaceStmt(Exp, Replacement);
>
> // Replace this subexpr in the parent.
> @@ -2129,8 +2129,8 @@
> QualType argType = Context->getPointerType(Context->CharTy);
> SelExprs.push_back(StringLiteral::Create(*Context,
> Exp->getSelector().getAsString(),
> - false, false, argType,
> - SourceLocation()));
> + StringLiteral::Ascii, false,
> + argType, SourceLocation()));
> CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
> &SelExprs[0], SelExprs.size());
> ReplaceStmt(Exp, SelExp);
> @@ -2797,7 +2797,8 @@
> QualType argType = Context->getPointerType(Context->CharTy);
> ClsExprs.push_back(StringLiteral::Create(*Context,
> ClassDecl->getIdentifier()->getName(),
> - false, false, argType, SourceLocation()));
> + StringLiteral::Ascii, false,
> + argType, SourceLocation()));
> CallExpr *Cls = SynthesizeCallToFunctionDecl(GetMetaClassFunctionDecl,
> &ClsExprs[0],
> ClsExprs.size(),
> @@ -2875,7 +2876,7 @@
> IdentifierInfo *clsName = Class->getIdentifier();
> ClsExprs.push_back(StringLiteral::Create(*Context,
> clsName->getName(),
> - false, false,
> + StringLiteral::Ascii, false,
> argType, SourceLocation()));
> CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
> &ClsExprs[0],
> @@ -2906,7 +2907,8 @@
> QualType argType = Context->getPointerType(Context->CharTy);
> ClsExprs.push_back(StringLiteral::Create(*Context,
> ClassDecl->getIdentifier()->getName(),
> - false, false, argType, SourceLocation()));
> + StringLiteral::Ascii, false, argType,
> + SourceLocation()));
> CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
> &ClsExprs[0],
> ClsExprs.size(),
> @@ -2987,7 +2989,8 @@
> QualType argType = Context->getPointerType(Context->CharTy);
> SelExprs.push_back(StringLiteral::Create(*Context,
> Exp->getSelector().getAsString(),
> - false, false, argType, SourceLocation()));
> + StringLiteral::Ascii, false,
> + argType, SourceLocation()));
> CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
> &SelExprs[0], SelExprs.size(),
> StartLoc,
>
> Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Jul 27 00:40:30 2011
> @@ -605,7 +605,7 @@
> Arg = Arg->IgnoreParenCasts();
> StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
>
> - if (!Literal || Literal->isWide()) {
> + if (!Literal || !Literal->isAscii()) {
> Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
> << Arg->getSourceRange();
> return true;
> @@ -1805,7 +1805,7 @@
> bool isPrintf) {
>
> // CHECK: is the format string a wide literal?
> - if (FExpr->isWide()) {
> + if (!FExpr->isAscii()) {
> Diag(FExpr->getLocStart(),
> diag::warn_format_string_is_wide_literal)
> << OrigFormatExpr->getSourceRange();
>
> Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Wed Jul 27 00:40:30 2011
> @@ -712,7 +712,7 @@
> Arg = Arg->IgnoreParenCasts();
> StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
>
> - if (Str == 0 || Str->isWide()) {
> + if (!Str || !Str->isAscii()) {
> S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
> << "weakref" << 1;
> return;
> @@ -737,7 +737,7 @@
> Arg = Arg->IgnoreParenCasts();
> StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
>
> - if (Str == 0 || Str->isWide()) {
> + if (!Str || !Str->isAscii()) {
> S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
> << "alias" << 1;
> return;
> @@ -1162,7 +1162,7 @@
> Arg = Arg->IgnoreParenCasts();
> StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
>
> - if (Str == 0 || Str->isWide()) {
> + if (!Str || !Str->isAscii()) {
> S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
> << "visibility" << 1;
> return;
> @@ -2464,7 +2464,7 @@
> case AttributeList::AT_pcs: {
> Expr *Arg = Attr.getArg(0);
> StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
> - if (Str == 0 || Str->isWide()) {
> + if (!Str || !Str->isAscii()) {
> S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
> << "pcs" << 1;
> Attr.setInvalid();
> @@ -2519,7 +2519,7 @@
> case AttributeList::AT_pcs: {
> Expr *Arg = attr.getArg(0);
> StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
> - if (Str == 0 || Str->isWide()) {
> + if (!Str || !Str->isAscii()) {
> Diag(attr.getLoc(), diag::err_attribute_argument_n_not_string)
> << "pcs" << 1;
> attr.setInvalid();
> @@ -2868,7 +2868,7 @@
>
> Expr *Arg = Attr.getArg(0);
> StringLiteral *Str = dyn_cast<StringLiteral>(Arg);
> - if (Str == 0 || Str->isWide()) {
> + if (!Str || !Str->isAscii()) {
> S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_not_string)
> << "uuid" << 1;
> return;
>
> Modified: cfe/trunk/lib/Sema/SemaExpr.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExpr.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaExpr.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaExpr.cpp Wed Jul 27 00:40:30 2011
> @@ -997,11 +997,25 @@
> StringTokLocs.push_back(StringToks[i].getLocation());
>
> QualType StrTy = Context.CharTy;
> - if (Literal.AnyWide)
> + if (Literal.isWide())
> StrTy = Context.getWCharType();
> + else if (Literal.isUTF16())
> + StrTy = Context.Char16Ty;
> + else if (Literal.isUTF32())
> + StrTy = Context.Char32Ty;
> else if (Literal.Pascal)
> StrTy = Context.UnsignedCharTy;
>
> + StringLiteral::StringKind Kind = StringLiteral::Ascii;
> + if (Literal.isWide())
> + Kind = StringLiteral::Wide;
> + else if (Literal.isUTF8())
> + Kind = StringLiteral::UTF8;
> + else if (Literal.isUTF16())
> + Kind = StringLiteral::UTF16;
> + else if (Literal.isUTF32())
> + Kind = StringLiteral::UTF32;
> +
> // A C++ string literal has a const-qualified element type (C++ 2.13.4p1).
> if (getLangOptions().CPlusPlus || getLangOptions().ConstStrings)
> StrTy.addConst();
> @@ -1015,7 +1029,7 @@
>
> // Pass &StringTokLocs[0], StringTokLocs.size() to factory!
> return Owned(StringLiteral::Create(Context, Literal.GetString(),
> - Literal.AnyWide, Literal.Pascal, StrTy,
> + Kind, Literal.Pascal, StrTy,
> &StringTokLocs[0],
> StringTokLocs.size()));
> }
> @@ -2412,7 +2426,7 @@
> return ExprError();
>
> CharLiteralParser Literal(ThisTok.begin(), ThisTok.end(), Tok.getLocation(),
> - PP);
> + PP, Tok.getKind());
> if (Literal.hadError())
> return ExprError();
>
> @@ -2421,14 +2435,25 @@
> Ty = Context.IntTy; // 'x' and L'x' -> int in C.
> else if (Literal.isWide())
> Ty = Context.WCharTy; // L'x' -> wchar_t in C++.
> + else if (Literal.isUTF16())
> + Ty = Context.Char16Ty; // u'x' -> char16_t in C++0x.
> + else if (Literal.isUTF32())
> + Ty = Context.Char32Ty; // U'x' -> char32_t in C++0x.
> else if (Literal.isMultiChar())
> Ty = Context.IntTy; // 'wxyz' -> int in C++.
> else
> Ty = Context.CharTy; // 'x' -> char in C++
>
> - return Owned(new (Context) CharacterLiteral(Literal.getValue(),
> - Literal.isWide(),
> - Ty, Tok.getLocation()));
> + CharacterLiteral::CharacterKind Kind = CharacterLiteral::Ascii;
> + if (Literal.isWide())
> + Kind = CharacterLiteral::Wide;
> + else if (Literal.isUTF16())
> + Kind = CharacterLiteral::UTF16;
> + else if (Literal.isUTF32())
> + Kind = CharacterLiteral::UTF32;
> +
> + return Owned(new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
> + Tok.getLocation()));
> }
>
> ExprResult Sema::ActOnNumericConstant(const Token &Tok) {
> @@ -8624,7 +8649,7 @@
>
> // Strip off any parens and casts.
> StringLiteral *SL = dyn_cast<StringLiteral>(SrcExpr->IgnoreParenCasts());
> - if (!SL || SL->isWide())
> + if (!SL || !SL->isAscii())
> return;
>
> Hint = FixItHint::CreateInsertion(SL->getLocStart(), "@");
>
> Modified: cfe/trunk/lib/Sema/SemaExprCXX.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprCXX.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaExprCXX.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaExprCXX.cpp Wed Jul 27 00:40:30 2011
> @@ -2041,12 +2041,20 @@
> = ToPtrType->getPointeeType()->getAs<BuiltinType>()) {
> // This conversion is considered only when there is an
> // explicit appropriate pointer target type (C++ 4.2p2).
> - if (!ToPtrType->getPointeeType().hasQualifiers() &&
> - ((StrLit->isWide() && ToPointeeType->isWideCharType()) ||
> - (!StrLit->isWide() &&
> - (ToPointeeType->getKind() == BuiltinType::Char_U ||
> - ToPointeeType->getKind() == BuiltinType::Char_S))))
> - return true;
> + if (!ToPtrType->getPointeeType().hasQualifiers()) {
> + switch (StrLit->getKind()) {
> + case StringLiteral::UTF8:
> + case StringLiteral::UTF16:
> + case StringLiteral::UTF32:
> + // We don't allow UTF literals to be implicitly converted
> + break;
> + case StringLiteral::Ascii:
> + return (ToPointeeType->getKind() == BuiltinType::Char_U ||
> + ToPointeeType->getKind() == BuiltinType::Char_S);
> + case StringLiteral::Wide:
> + return ToPointeeType->isWideCharType();
> + }
> + }
> }
>
> return false;
>
> Modified: cfe/trunk/lib/Sema/SemaExprObjC.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaExprObjC.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaExprObjC.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaExprObjC.cpp Wed Jul 27 00:40:30 2011
> @@ -47,8 +47,8 @@
> for (unsigned i = 0; i != NumStrings; ++i) {
> S = Strings[i];
>
> - // ObjC strings can't be wide.
> - if (S->isWide()) {
> + // ObjC strings can't be wide or UTF.
> + if (!S->isAscii()) {
> Diag(S->getLocStart(), diag::err_cfstring_literal_not_string_constant)
> << S->getSourceRange();
> return true;
> @@ -64,7 +64,7 @@
> // Create the aggregate string with the appropriate content and location
> // information.
> S = StringLiteral::Create(Context, StrBuf,
> - /*Wide=*/false, /*Pascal=*/false,
> + StringLiteral::Ascii, /*Pascal=*/false,
> Context.getPointerType(Context.CharTy),
> &StrLocs[0], StrLocs.size());
> }
>
> Modified: cfe/trunk/lib/Sema/SemaInit.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaInit.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaInit.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaInit.cpp Wed Jul 27 00:40:30 2011
> @@ -49,20 +49,30 @@
> if (SL == 0) return 0;
>
> QualType ElemTy = Context.getCanonicalType(AT->getElementType());
> - // char array can be initialized with a narrow string.
> - // Only allow char x[] = "foo"; not char x[] = L"foo";
> - if (!SL->isWide())
> +
> + switch (SL->getKind()) {
> + case StringLiteral::Ascii:
> + case StringLiteral::UTF8:
> + // char array can be initialized with a narrow string.
> + // Only allow char x[] = "foo"; not char x[] = L"foo";
> return ElemTy->isCharType() ? Init : 0;
> + case StringLiteral::UTF16:
> + return ElemTy->isChar16Type() ? Init : 0;
> + case StringLiteral::UTF32:
> + return ElemTy->isChar32Type() ? Init : 0;
> + case StringLiteral::Wide:
> + // wchar_t array can be initialized with a wide string: C99 6.7.8p15 (with
> + // correction from DR343): "An array with element type compatible with a
> + // qualified or unqualified version of wchar_t may be initialized by a wide
> + // string literal, optionally enclosed in braces."
> + if (Context.typesAreCompatible(Context.getWCharType(),
> + ElemTy.getUnqualifiedType()))
> + return Init;
>
> - // wchar_t array can be initialized with a wide string: C99 6.7.8p15 (with
> - // correction from DR343): "An array with element type compatible with a
> - // qualified or unqualified version of wchar_t may be initialized by a wide
> - // string literal, optionally enclosed in braces."
> - if (Context.typesAreCompatible(Context.getWCharType(),
> - ElemTy.getUnqualifiedType()))
> - return Init;
> + return 0;
> + }
>
> - return 0;
> + llvm_unreachable("missed a StringLiteral kind?");
> }
>
> static Expr *IsStringInit(Expr *init, QualType declType, ASTContext &Context) {
>
> Modified: cfe/trunk/lib/Sema/SemaStmt.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaStmt.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaStmt.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaStmt.cpp Wed Jul 27 00:40:30 2011
> @@ -1952,13 +1952,13 @@
> SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
>
> // The parser verifies that there is a string literal here.
> - if (AsmString->isWide())
> + if (!AsmString->isAscii())
> return StmtError(Diag(AsmString->getLocStart(),diag::err_asm_wide_character)
> << AsmString->getSourceRange());
>
> for (unsigned i = 0; i != NumOutputs; i++) {
> StringLiteral *Literal = Constraints[i];
> - if (Literal->isWide())
> + if (!Literal->isAscii())
> return StmtError(Diag(Literal->getLocStart(),diag::err_asm_wide_character)
> << Literal->getSourceRange());
>
> @@ -1987,7 +1987,7 @@
>
> for (unsigned i = NumOutputs, e = NumOutputs + NumInputs; i != e; i++) {
> StringLiteral *Literal = Constraints[i];
> - if (Literal->isWide())
> + if (!Literal->isAscii())
> return StmtError(Diag(Literal->getLocStart(),diag::err_asm_wide_character)
> << Literal->getSourceRange());
>
> @@ -2034,7 +2034,7 @@
> // Check that the clobbers are valid.
> for (unsigned i = 0; i != NumClobbers; i++) {
> StringLiteral *Literal = Clobbers[i];
> - if (Literal->isWide())
> + if (!Literal->isAscii())
> return StmtError(Diag(Literal->getLocStart(),diag::err_asm_wide_character)
> << Literal->getSourceRange());
>
>
> Modified: cfe/trunk/lib/Sema/SemaTemplate.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaTemplate.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Sema/SemaTemplate.cpp (original)
> +++ cfe/trunk/lib/Sema/SemaTemplate.cpp Wed Jul 27 00:40:30 2011
> @@ -4131,10 +4131,22 @@
> assert(Arg.getKind() == TemplateArgument::Integral &&
> "Operation is only valid for integral template arguments");
> QualType T = Arg.getIntegralType();
> - if (T->isCharType() || T->isWideCharType())
> + if (T->isAnyCharacterType()) {
> + CharacterLiteral::CharacterKind Kind;
> + if (T->isWideCharType())
> + Kind = CharacterLiteral::Wide;
> + else if (T->isChar16Type())
> + Kind = CharacterLiteral::UTF16;
> + else if (T->isChar32Type())
> + Kind = CharacterLiteral::UTF32;
> + else
> + Kind = CharacterLiteral::Ascii;
> +
> return Owned(new (Context) CharacterLiteral(
> - Arg.getAsIntegral()->getZExtValue(),
> - T->isWideCharType(), T, Loc));
> + Arg.getAsIntegral()->getZExtValue(),
> + Kind, T, Loc));
> + }
> +
> if (T->isBooleanType())
> return Owned(new (Context) CXXBoolLiteralExpr(
> Arg.getAsIntegral()->getBoolValue(),
>
> Modified: cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReaderStmt.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Serialization/ASTReaderStmt.cpp (original)
> +++ cfe/trunk/lib/Serialization/ASTReaderStmt.cpp Wed Jul 27 00:40:30 2011
> @@ -371,7 +371,7 @@
> assert(Record[Idx] == E->getNumConcatenated() &&
> "Wrong number of concatenated tokens!");
> ++Idx;
> - E->IsWide = Record[Idx++];
> + E->Kind = static_cast<StringLiteral::StringKind>(Record[Idx++]);
> E->IsPascal = Record[Idx++];
>
> // Read string data
> @@ -388,7 +388,7 @@
> VisitExpr(E);
> E->setValue(Record[Idx++]);
> E->setLocation(ReadSourceLocation(Record, Idx));
> - E->setWide(Record[Idx++]);
> + E->setKind(static_cast<CharacterLiteral::CharacterKind>(Record[Idx++]));
> }
>
> void ASTStmtReader::VisitParenExpr(ParenExpr *E) {
>
> Modified: cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriterStmt.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Serialization/ASTWriterStmt.cpp (original)
> +++ cfe/trunk/lib/Serialization/ASTWriterStmt.cpp Wed Jul 27 00:40:30 2011
> @@ -324,7 +324,7 @@
> VisitExpr(E);
> Record.push_back(E->getByteLength());
> Record.push_back(E->getNumConcatenated());
> - Record.push_back(E->isWide());
> + Record.push_back(E->getKind());
> Record.push_back(E->isPascal());
> // FIXME: String data should be stored as a blob at the end of the
> // StringLiteral. However, we can't do so now because we have no
> @@ -340,7 +340,7 @@
> VisitExpr(E);
> Record.push_back(E->getValue());
> Writer.AddSourceLocation(E->getLocation(), Record);
> - Record.push_back(E->isWide());
> + Record.push_back(E->getKind());
>
> AbbrevToUse = Writer.getCharacterLiteralAbbrev();
>
>
> Modified: cfe/trunk/test/CXX/lex/lex.literal/lex.ccon/p1.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CXX/lex/lex.literal/lex.ccon/p1.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/test/CXX/lex/lex.literal/lex.ccon/p1.cpp (original)
> +++ cfe/trunk/test/CXX/lex/lex.literal/lex.ccon/p1.cpp Wed Jul 27 00:40:30 2011
> @@ -1,4 +1,5 @@
> -// RUN: %clang_cc1 -fsyntax-only -verify %s
> +// RUN: %clang_cc1 -std=c++0x -fsyntax-only -verify %s
> +// Runs in c++0x mode so that char16_t and char32_t are available.
>
> // Check types of char literals
> extern char a;
> @@ -7,3 +8,7 @@
> extern __typeof('asdf') b;
> extern wchar_t c;
> extern __typeof(L'a') c;
> +extern char16_t d;
> +extern __typeof(u'a') d;
> +extern char32_t e;
> +extern __typeof(U'a') e;
>
> Modified: cfe/trunk/test/CodeGen/char-literal.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/char-literal.c?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/char-literal.c (original)
> +++ cfe/trunk/test/CodeGen/char-literal.c Wed Jul 27 00:40:30 2011
> @@ -1,5 +1,5 @@
> -// RUN: %clang_cc1 -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
> -// Runs in c++ mode so that wchar_t is available.
> +// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
> +// Runs in c++0x mode so that wchar_t, char16_t, and char32_t are available.
>
> int main() {
> // CHECK: store i8 97
> @@ -16,6 +16,20 @@
> // CHECK: store i32 98
> wchar_t wb = L'ab';
>
> + // CHECK: store i16 97
> + char16_t ua = u'a';
> +
> + // Should pick second character.
> + // CHECK: store i16 98
> + char16_t ub = u'ab';
> +
> + // CHECK: store i32 97
> + char32_t Ua = U'a';
> +
> + // Should pick second character.
> + // CHECK: store i32 98
> + char32_t Ub = U'ab';
> +
> // Should pick last character and store its lowest byte.
> // This does not match gcc, which takes the last character, converts it to
> // utf8, and then picks the second-lowest byte of that (they probably store
> @@ -26,10 +40,36 @@
> // CHECK: store i32 61451
> wchar_t wc = L'\uF00B';
>
> + // -4085 == 0xf00b
> + // CHECK: store i16 -4085
> + char16_t uc = u'\uF00B';
> +
> + // CHECK: store i32 61451
> + char32_t Uc = U'\uF00B';
> +
> // CHECK: store i32 1110027
> wchar_t wd = L'\U0010F00B';
>
> + // Should take lower word of the 4byte UNC sequence. This does not match
> + // gcc. I don't understand what gcc does (it looks like it converts to utf16,
> + // then takes the second (!) utf16 word, swaps the lower two nibbles, and
> + // stores that?).
> + // CHECK: store i16 -4085
> + char16_t ud = u'\U0010F00B'; // has utf16 encoding dbc8 dcb0
> +
> + // CHECK: store i32 1110027
> + char32_t Ud = U'\U0010F00B';
> +
> // Should pick second character.
> // CHECK: store i32 1110027
> wchar_t we = L'\u1234\U0010F00B';
> +
> + // Should pick second character.
> + // CHECK: store i16 -4085
> + char16_t ue = u'\u1234\U0010F00B';
> +
> + // Should pick second character.
> + // CHECK: store i32 1110027
> + char32_t Ue = U'\u1234\U0010F00B';
> +
> }
>
> Modified: cfe/trunk/test/CodeGen/string-literal.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/string-literal.c?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/string-literal.c (original)
> +++ cfe/trunk/test/CodeGen/string-literal.c Wed Jul 27 00:40:30 2011
> @@ -1,4 +1,5 @@
> -// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
> +// RUN: %clang_cc1 -x c++ -std=c++0x -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s
> +// Runs in c++0x mode so that wchar_t, char16_t, and char32_t are available.
>
> int main() {
> // CHECK: internal unnamed_addr constant [10 x i8] c"abc\00\00\00\00\00\00\00", align 1
> @@ -9,8 +10,24 @@
> char b[10] = "\u1120\u0220\U00102030";
>
> // CHECK: private unnamed_addr constant [12 x i8] c"A\00\00\00B\00\00\00\00\00\00\00", align 1
> - void *foo = L"AB";
> + const wchar_t *foo = L"AB";
>
> // CHECK: private unnamed_addr constant [12 x i8] c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 1
> - void *bar = L"\u1234\U0010F00B";
> + const wchar_t *bar = L"\u1234\U0010F00B";
> +
> + // CHECK: private unnamed_addr constant [12 x i8] c"C\00\00\00D\00\00\00\00\00\00\00", align 1
> + const char32_t *c = U"CD";
> +
> + // CHECK: private unnamed_addr constant [12 x i8] c"5\12\00\00\0C\F0\10\00\00\00\00\00", align 1
> + const char32_t *d = U"\u1235\U0010F00C";
> +
> + // CHECK: private unnamed_addr constant [6 x i8] c"E\00F\00\00\00", align 1
> + const char16_t *e = u"EF";
> +
> + // This should convert to utf16.
> + // CHECK: private unnamed_addr constant [10 x i8] c" \11 \02\C8\DB0\DC\00\00", align 1
> + const char16_t *f = u"\u1120\u0220\U00102030";
> +
> + // CHECK: private unnamed_addr constant [4 x i8] c"def\00", align 1
> + const char *g = u8"def";
> }
>
> Modified: cfe/trunk/test/Lexer/wchar.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Lexer/wchar.c?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/test/Lexer/wchar.c (original)
> +++ cfe/trunk/test/Lexer/wchar.c Wed Jul 27 00:40:30 2011
> @@ -5,8 +5,8 @@
>
> (void)L'\U00010000'; // expected-warning {{character unicode escape sequence too long for its type}}
>
> - (void)L'ab'; // expected-warning {{extraneous characters in wide character constant ignored}}
> + (void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
>
> - (void)L'a\u1000'; // expected-warning {{extraneous characters in wide character constant ignored}}
> + (void)L'a\u1000'; // expected-warning {{extraneous characters in character constant ignored}}
> }
>
>
> Modified: cfe/trunk/test/Parser/char-literal-printing.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Parser/char-literal-printing.c?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/test/Parser/char-literal-printing.c (original)
> +++ cfe/trunk/test/Parser/char-literal-printing.c Wed Jul 27 00:40:30 2011
> @@ -1,6 +1,5 @@
> -// RUN: %clang_cc1 -ast-print %s
> -
> -#include <stddef.h>
> +// RUN: %clang_cc1 -x c++ -std=c++0x -ast-print %s
> +// Runs in c++0x mode so that wchar_t, char16_t, and char32_t are available.
>
> char test1(void) { return '\\'; }
> wchar_t test2(void) { return L'\\'; }
> @@ -29,3 +28,35 @@
> wchar_t test24(void) { return L'\x3'; }
>
> wchar_t test25(void) { return L'\x333'; }
> +
> +char16_t test26(void) { return u'\\'; }
> +char16_t test27(void) { return u'\''; }
> +char16_t test28(void) { return u'\a'; }
> +char16_t test29(void) { return u'\b'; }
> +char16_t test30(void) { return u'\e'; }
> +char16_t test31(void) { return u'\f'; }
> +char16_t test32(void) { return u'\n'; }
> +char16_t test33(void) { return u'\r'; }
> +char16_t test34(void) { return u'\t'; }
> +char16_t test35(void) { return u'\v'; }
> +
> +char16_t test36(void) { return u'c'; }
> +char16_t test37(void) { return u'\x3'; }
> +
> +char16_t test38(void) { return u'\x333'; }
> +
> +char32_t test39(void) { return U'\\'; }
> +char32_t test40(void) { return U'\''; }
> +char32_t test41(void) { return U'\a'; }
> +char32_t test42(void) { return U'\b'; }
> +char32_t test43(void) { return U'\e'; }
> +char32_t test44(void) { return U'\f'; }
> +char32_t test45(void) { return U'\n'; }
> +char32_t test46(void) { return U'\r'; }
> +char32_t test47(void) { return U'\t'; }
> +char32_t test48(void) { return U'\v'; }
> +
> +char32_t test49(void) { return U'c'; }
> +char32_t test50(void) { return U'\x3'; }
> +
> +char32_t test51(void) { return U'\x333'; }
>
> Modified: cfe/trunk/test/SemaCXX/type-convert-construct.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/type-convert-construct.cpp?rev=136210&r1=136209&r2=136210&view=diff
> ==============================================================================
> --- cfe/trunk/test/SemaCXX/type-convert-construct.cpp (original)
> +++ cfe/trunk/test/SemaCXX/type-convert-construct.cpp Wed Jul 27 00:40:30 2011
> @@ -1,4 +1,5 @@
> -// RUN: %clang_cc1 -fsyntax-only -verify %s
> +// RUN: %clang_cc1 -std=gnu++0x -fsyntax-only -verify %s
> +// Runs in c++0x mode so that char16_t and char32_t are available.
>
> void f() {
> float v1 = float(1);
> @@ -14,4 +15,8 @@
> str = "a string"; // expected-warning{{conversion from string literal to 'char *' is deprecated}}
> wchar_t *wstr;
> wstr = L"a wide string"; // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
> + char16_t *ustr;
> + ustr = u"a UTF-16 string"; // expected-error {{assigning to 'char16_t *' from incompatible type 'const char16_t [16]'}}
> + char32_t *Ustr;
> + Ustr = U"a UTF-32 string"; // expected-error {{assigning to 'char32_t *' from incompatible type 'const char32_t [16]'}}
> }
Are there any tests left for wide characters in non-c++0x mode? Should there be?
Nico
More information about the cfe-commits
mailing list