[cfe-commits] r38960 - in /cfe/cfe/trunk: AST/ASTStreamer.cpp AST/Expr.cpp AST/Sema.cpp Lex/Lexer.cpp Lex/MacroExpander.cpp Lex/Pragma.cpp Lex/Preprocessor.cpp Parse/ParseExpr.cpp Parse/ParseStmt.cpp Parse/Parser.cpp Sema/ASTStreamer.cpp Sema/Sema.cpp clang.xcodeproj/project.pbxproj include/clang/AST/Expr.h include/clang/Basic/DiagnosticKinds.def include/clang/Basic/TokenKinds.def include/clang/Lex/Lexer.h include/clang/Parse/Action.h include/clang/Parse/Parser.h

Wed Jul 11 09:26:15 PDT 2007

Author: sabre
Date: Wed Jul 11 11:26:15 2007
New Revision: 38960

URL: http://llvm.org/viewvc/llvm-project?rev=38960&view=rev
Log:
Initial support for semantic analysis and AST building for StringExpr nodes.

Modified:
    cfe/cfe/trunk/AST/ASTStreamer.cpp
    cfe/cfe/trunk/AST/Expr.cpp
    cfe/cfe/trunk/AST/Sema.cpp
    cfe/cfe/trunk/Lex/Lexer.cpp
    cfe/cfe/trunk/Lex/MacroExpander.cpp
    cfe/cfe/trunk/Lex/Pragma.cpp
    cfe/cfe/trunk/Lex/Preprocessor.cpp
    cfe/cfe/trunk/Parse/ParseExpr.cpp
    cfe/cfe/trunk/Parse/ParseStmt.cpp
    cfe/cfe/trunk/Parse/Parser.cpp
    cfe/cfe/trunk/Sema/ASTStreamer.cpp
    cfe/cfe/trunk/Sema/Sema.cpp
    cfe/cfe/trunk/clang.xcodeproj/project.pbxproj
    cfe/cfe/trunk/include/clang/AST/Expr.h
    cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def
    cfe/cfe/trunk/include/clang/Basic/TokenKinds.def
    cfe/cfe/trunk/include/clang/Lex/Lexer.h
    cfe/cfe/trunk/include/clang/Parse/Action.h
    cfe/cfe/trunk/include/clang/Parse/Parser.h

Modified: cfe/cfe/trunk/AST/ASTStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/ASTStreamer.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================

--- cfe/cfe/trunk/AST/ASTStreamer.cpp (original)
+++ cfe/cfe/trunk/AST/ASTStreamer.cpp Wed Jul 11 11:26:15 2007
@@ -19,7 +19,7 @@
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo);
 
 
 namespace {
@@ -27,7 +27,7 @@
     Parser P;
   public:
     ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo)
-      : P(PP, *CreateASTBuilderActions(FullLocInfo)) {
+      : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) {
       PP.EnterSourceFile(MainFileID, 0, true);
       
       // Initialize the parser.

Modified: cfe/cfe/trunk/AST/Expr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/Expr.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/AST/Expr.cpp (original)
+++ cfe/cfe/trunk/AST/Expr.cpp Wed Jul 11 11:26:15 2007
@@ -43,6 +43,28 @@
   std::cerr << "1.0";
 }
 
+
+
+StringExpr::StringExpr(const char *strData, unsigned byteLength, bool Wide) {
+  // OPTIMIZE: could allocate this appended to the StringExpr.
+  char *AStrData = new char[byteLength];
+  memcpy(AStrData, strData, byteLength);
+  StrData = AStrData;
+  ByteLength = byteLength;
+  isWide = Wide;
+}
+
+StringExpr::~StringExpr() {
+  delete[] StrData;
+}
+
+void StringExpr::dump_impl() const {
+  if (isWide) std::cerr << 'L';
+  std::cerr << '"' << StrData << '"';
+}
+
+
+
 void ParenExpr::dump_impl() const {
   std::cerr << "'('";
   Val->dump();

Modified: cfe/cfe/trunk/AST/Sema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/Sema.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/AST/Sema.cpp (original)
+++ cfe/cfe/trunk/AST/Sema.cpp Wed Jul 11 11:26:15 2007
@@ -18,18 +18,23 @@
 #include "clang/Parse/Scope.h"
 #include "clang/Lex/IdentifierTable.h"
 #include "clang/Lex/LexerToken.h"
-#include "llvm/Support/Visibility.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/Compiler.h"
 using namespace llvm;
 using namespace clang;
 
 /// ASTBuilder
 namespace {
 class VISIBILITY_HIDDEN ASTBuilder : public Action {
+  Preprocessor &PP;
+  
   /// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that
   /// capture maximal location information for each source-language construct.
   bool FullLocInfo;
 public:
-  ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {}
+  ASTBuilder(Preprocessor &pp, bool fullLocInfo)
+    : PP(pp), FullLocInfo(fullLocInfo) {}
+  
   //===--------------------------------------------------------------------===//
   // Symbol table tracking callbacks.
   //
@@ -47,6 +52,9 @@
   virtual ExprResult ParseFloatingConstant(const LexerToken &Tok);
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val);
+  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+                                     bool isWide,
+                                     const LexerToken *Toks, unsigned NumToks);
   
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input);
@@ -166,6 +174,26 @@
   return new ParenExpr(L, R, (Expr*)Val);
 }
 
+/// ParseStringExpr - This accepts a string after semantic analysis. This string
+/// may be the result of string concatenation ([C99 5.1.1.2, translation phase
+/// #6]), so it may come from multiple tokens.
+/// 
+Action::ExprResult ASTBuilder::
+ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
+                const LexerToken *Toks, unsigned NumToks) {
+  assert(NumToks && "Must have at least one string!");
+  
+  if (!FullLocInfo)
+    return new StringExpr(StrData, StrLen, isWide);
+  else {
+    SmallVector<SourceLocation, 4> Locs;
+    for (unsigned i = 0; i != NumToks; ++i)
+      Locs.push_back(Toks[i].getLocation());
+    return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size());
+  }
+}
+
+
 // Unary Operators.  'Tok' is the token for the operator.
 Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok,
                                             ExprTy *Input) {
@@ -326,8 +354,8 @@
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo) {
-  return new ASTBuilder(FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) {
+  return new ASTBuilder(PP, FullLocInfo);
 }
 
 

Modified: cfe/cfe/trunk/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Lexer.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Lex/Lexer.cpp (original)
+++ cfe/cfe/trunk/Lex/Lexer.cpp Wed Jul 11 11:26:15 2007
@@ -444,7 +444,7 @@
 
 /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
 /// either " or L".
-void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
+void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr, bool Wide){
   const char *NulCharacter = 0; // Does this string contain the \0 character?
   
   char C = getAndAdvanceChar(CurPtr, Result);
@@ -468,7 +468,7 @@
   // If a nul character existed in the string, warn about it.
   if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
 
-  Result.SetKind(tok::string_literal);
+  Result.SetKind(Wide ? tok::wide_string_literal : tok::string_literal);
 
   // Update the location of the token as well as the BufferPtr instance var.
   FormTokenWithChars(Result, CurPtr);
@@ -1104,7 +1104,8 @@
 
     // Wide string literal.
     if (Char == '"')
-      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+      return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                              true);
 
     // Wide character constant.
     if (Char == '\'')
@@ -1143,7 +1144,7 @@
   case '"':
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
-    return LexStringLiteral(Result, CurPtr);
+    return LexStringLiteral(Result, CurPtr, false);
 
   // C99 6.4.6: Punctuators.
   case '?':

Modified: cfe/cfe/trunk/Lex/MacroExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/MacroExpander.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Lex/MacroExpander.cpp (original)
+++ cfe/cfe/trunk/Lex/MacroExpander.cpp Wed Jul 11 11:26:15 2007
@@ -161,8 +161,9 @@
     
     // If this is a string or character constant, escape the token as specified
     // by 6.10.3.2p2.
-    if (Tok.getKind() == tok::string_literal ||  // "foo" and L"foo".
-        Tok.getKind() == tok::char_constant) {   // 'x' and L'x'.
+    if (Tok.getKind() == tok::string_literal ||      // "foo"
+        Tok.getKind() == tok::wide_string_literal || // L"foo"
+        Tok.getKind() == tok::char_constant) {       // 'x' and L'x'.
       Result += Lexer::Stringify(PP.getSpelling(Tok));
     } else {
       // Otherwise, just append the token.

Modified: cfe/cfe/trunk/Lex/Pragma.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Pragma.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Lex/Pragma.cpp (original)
+++ cfe/cfe/trunk/Lex/Pragma.cpp Wed Jul 11 11:26:15 2007
@@ -96,7 +96,8 @@
 
   // Read the '"..."'.
   Lex(Tok);
-  if (Tok.getKind() != tok::string_literal)
+  if (Tok.getKind() != tok::string_literal &&
+      Tok.getKind() != tok::wide_string_literal)
     return Diag(PragmaLoc, diag::err__Pragma_malformed);
   
   // Remember the string.

Modified: cfe/cfe/trunk/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Preprocessor.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Lex/Preprocessor.cpp (original)
+++ cfe/cfe/trunk/Lex/Preprocessor.cpp Wed Jul 11 11:26:15 2007
@@ -444,7 +444,6 @@
 /// tokens from it instead of the current buffer.
 void Preprocessor::EnterMacro(LexerToken &Tok, MacroArgs *Args) {
   IdentifierInfo *Identifier = Tok.getIdentifierInfo();
-  MacroInfo &MI = *Identifier->getMacroInfo();
   IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
                                                CurMacroExpander));
   CurLexer     = 0;
@@ -1492,7 +1491,8 @@
   Lex(StrTok);
   
   // If the token kind isn't a string, it's a malformed directive.
-  if (StrTok.getKind() != tok::string_literal)
+  if (StrTok.getKind() != tok::string_literal &&
+      StrTok.getKind() != tok::wide_string_literal)
     return Diag(StrTok, diag::err_pp_malformed_ident);
   
   // Verify that there is nothing after the string, other than EOM.

Modified: cfe/cfe/trunk/Parse/ParseExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/ParseExpr.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Parse/ParseExpr.cpp (original)
+++ cfe/cfe/trunk/Parse/ParseExpr.cpp Wed Jul 11 11:26:15 2007
@@ -22,6 +22,8 @@
 #include "clang/Parse/Parser.h"
 #include "clang/Basic/Diagnostic.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/Alloca.h"
 using namespace llvm;
 using namespace clang;
 
@@ -490,6 +492,7 @@
     // These can be followed by postfix-expr pieces.
     return ParsePostfixExpressionSuffix(Res);
   case tok::string_literal:    // primary-expression: string-literal
+  case tok::wide_string_literal:
     Res = ParseStringLiteralExpression();
     if (Res.isInvalid) return Res;
     // This can be followed by postfix-expr pieces (e.g. "foo"[1]).
@@ -809,24 +812,6 @@
   return ParsePostfixExpressionSuffix(Res);
 }
 
-/// ParseStringLiteralExpression - This handles the various token types that
-/// form string literals, and also handles string concatenation [C99 5.1.1.2,
-/// translation phase #6].
-///
-///       primary-expression: [C99 6.5.1]
-///         string-literal
-Parser::ExprResult Parser::ParseStringLiteralExpression() {
-  assert(isTokenStringLiteral() && "Not a string literal!");
-  ConsumeStringToken();
-  
-  // String concat.  Note that keywords like __func__ and __FUNCTION__ aren't
-  // considered to be strings.
-  while (isTokenStringLiteral())
-    ConsumeStringToken();
-  // TODO: Build AST for string literals.
-  return ExprResult(false);
-}
-
 
 /// ParseParenExpression - This parses the unit that starts with a '(' token,
 /// based on what is allowed by ExprType.  The actual thing parsed is returned
@@ -906,3 +891,223 @@
   
   return Result;
 }
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  if (C >= 'A' && C <= 'F') return C-'A'+10;
+  return -1;
+}
+
+/// ParseStringLiteralExpression - This handles the various token types that
+/// form string literals, and also handles string concatenation [C99 5.1.1.2,
+/// translation phase #6].
+///
+///       primary-expression: [C99 6.5.1]
+///         string-literal
+Parser::ExprResult Parser::ParseStringLiteralExpression() {
+  assert(isTokenStringLiteral() && "Not a string literal!");
+  
+  // String concat.  Note that keywords like __func__ and __FUNCTION__ are not
+  // considered to be strings for concatenation purposes.
+  SmallVector<LexerToken, 4> StringToks;
+  
+  // While we're looking at all of the string portions, remember the max
+  // individual token length, computing a bound on the concatenated string
+  // length, and see whether any piece is a wide-string.  If any of the string
+  // portions is a wide-string literal, the result is also a wide-string literal
+  // [C99 6.4.5p4].
+  unsigned SizeBound = 0, MaxTokenLength = 0;
+  bool AnyWide = false;
+  do {
+    // The string could be shorter than this if it needs cleaning, but this is a
+    // reasonable bound, which is all we need.
+    SizeBound += Tok.getLength()-2;  // -2 for "".
+    
+    // Find maximum string piece length.
+    if (Tok.getLength() > MaxTokenLength) 
+      MaxTokenLength = Tok.getLength();
+    
+    // Remember if we see any wide strings.
+    AnyWide |= Tok.getKind() == tok::wide_string_literal;
+    
+    // Remember the string token.
+    StringToks.push_back(Tok);
+    ConsumeStringToken();
+  } while (isTokenStringLiteral());
+  
+  // Include space for the null terminator.
+  ++SizeBound;
+  
+  // TODO: K&R warning: "traditional C rejects string constant concatenation"
+  
+  // FIXME: Size of wchar_t should not be hardcoded!
+  unsigned wchar_tByteWidth = 4;
+  
+  // The output buffer size needs to be large enough to hold wide characters.
+  // This is a worst-case assumption which basically corresponds to L"" "long".
+  if (AnyWide)
+    SizeBound *= wchar_tByteWidth;
+  
+  // Create a temporary buffer to hold the result string data.  If it is "big",
+  // use malloc, otherwise use alloca.
+  char *ResultBuf;
+  if (SizeBound > 512)
+    ResultBuf = (char*)malloc(SizeBound);
+  else
+    ResultBuf = (char*)alloca(SizeBound);
+  
+  // Likewise, but for each string piece.
+  char *TokenBuf;
+  if (MaxTokenLength > 512)
+    TokenBuf = (char*)malloc(MaxTokenLength);
+  else
+    TokenBuf = (char*)alloca(MaxTokenLength);
+  
+  // Loop over all the strings, getting their spelling, and expanding them to
+  // wide strings as appropriate.
+  char *ResultPtr = ResultBuf;   // Next byte to fill in.
+  
+  for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
+    const char *ThisTokBuf = TokenBuf;
+    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
+    // that ThisTokBuf points to a buffer that is big enough for the whole token
+    // and 'spelled' tokens can only shrink.
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    
+    // TODO: Input character set mapping support.
+    
+    // Skip L marker for wide strings.
+    if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
+    
+    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+    ++ThisTokBuf;
+    
+    while (ThisTokBuf != ThisTokEnd) {
+      // Is this a span of non-escape characters?
+      if (ThisTokBuf[0] != '\\') {
+        const char *InStart = ThisTokBuf;
+        do {
+          ++ThisTokBuf;
+        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+        
+        // Copy the character span over.
+        unsigned Len = ThisTokBuf-InStart;
+        if (!AnyWide) {
+          memcpy(ResultPtr, InStart, Len);
+          ResultPtr += Len;
+        } else {
+          // Note: our internal rep of wide char tokens is always little-endian.
+          for (; Len; --Len, ++InStart) {
+            *ResultPtr++ = InStart[0];
+            // Add zeros at the end.
+            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+              *ResultPtr++ = 0;
+          }
+        }
+        continue;
+      }
+      
+      // Otherwise, this is an escape character.  Skip the '\' char.
+      ++ThisTokBuf;
+      
+      // We know that this character can't be off the end of the buffer, because
+      // that would have been \", which would not have been the end of string.
+      unsigned ResultChar = *ThisTokBuf++;
+      switch (ResultChar) {
+      // These map to themselves.
+      case '\\': case '\'': case '"': case '?': break;
+        
+      // These have fixed mappings.
+      case 'a':
+        // TODO: K&R: the meaning of '\\a' is different in traditional C
+        ResultChar = 7;
+        break;
+      case 'b':
+        ResultChar = 8;
+        break;
+      case 'e':
+        PP.Diag(StringToks[i], diag::ext_nonstandard_escape, "e");
+        ResultChar = 27;
+        break;
+      case 'f':
+        ResultChar = 12;
+        break;
+      case 'n':
+        ResultChar = 10;
+        break;
+      case 'r':
+        ResultChar = 13;
+        break;
+      case 't':
+        ResultChar = 9;
+        break;
+      case 'v':
+        ResultChar = 11;
+        break;
+        
+      //case 'u': case 'U':  // FIXME: UCNs.
+      case 'x': // Hex escape.
+        if (ThisTokBuf == ThisTokEnd ||
+            (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
+          PP.Diag(StringToks[i], diag::err_hex_escape_no_digits);
+          ResultChar = 0;
+          break;
+        }
+        ++ThisTokBuf; // Consumed one hex digit.
+        
+        assert(0 && "hex escape: unimp!");
+        break;
+      case '0': case '1': case '2': case '3':
+      case '4': case '5': case '6': case '7':
+        // Octal escapes.
+        assert(0 && "octal escape: unimp!");
+        break;
+        
+      // Otherwise, these are not valid escapes.
+      case '(': case '{': case '[': case '%':
+        // GCC accepts these as extensions.  We warn about them as such though.
+        if (!PP.getLangOptions().NoExtensions) {
+          PP.Diag(StringToks[i], diag::ext_nonstandard_escape,
+                  std::string()+(char)ResultChar);
+          break;
+        }
+        // FALL THROUGH.
+      default:
+        if (isgraph(ThisTokBuf[0])) {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  std::string()+(char)ResultChar);
+        } else {
+          PP.Diag(StringToks[i], diag::ext_unknown_escape,
+                  "x"+utohexstr(ResultChar));
+        }
+      }
+
+      // Note: our internal rep of wide char tokens is always little-endian.
+      for (unsigned i = 0, e = wchar_tByteWidth; i != e; ++i)
+        *ResultPtr++ = ResultChar >> i*8;
+    }
+  }
+  
+  // Add zero terminator.
+  *ResultPtr = 0;
+  if (AnyWide) {
+    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+      *ResultPtr++ = 0;
+  }
+  
+  // Hand this off to the Actions.
+  ExprResult Res = Actions.ParseStringExpr(ResultBuf, ResultPtr-ResultBuf,
+                                           AnyWide,
+                                           &StringToks[0], StringToks.size());
+  
+  // If either buffer was heap allocated, release it now.
+  if (MaxTokenLength > 512) free(TokenBuf);
+  if (SizeBound > 512) free(ResultBuf);
+  
+  return Res;
+}
+

Modified: cfe/cfe/trunk/Parse/ParseStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/ParseStmt.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Parse/ParseStmt.cpp (original)
+++ cfe/cfe/trunk/Parse/ParseStmt.cpp Wed Jul 11 11:26:15 2007
@@ -597,7 +597,7 @@
   if (Tok.getKind() == tok::colon) {
     ConsumeToken();
     
-    if (Tok.getKind() == tok::string_literal) {
+    if (isTokenStringLiteral()) {
       // Parse the asm-string list for clobbers.
       while (1) {
         ParseAsmStringLiteral();
@@ -629,7 +629,7 @@
   ConsumeToken();
   
   // 'asm-operands' isn't present?
-  if (Tok.getKind() != tok::string_literal && Tok.getKind() != tok::l_square)
+  if (!isTokenStringLiteral() && Tok.getKind() != tok::l_square)
     return;
   
   while (1) {

Modified: cfe/cfe/trunk/Parse/Parser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/Parser.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Parse/Parser.cpp (original)
+++ cfe/cfe/trunk/Parse/Parser.cpp Wed Jul 11 11:26:15 2007
@@ -152,6 +152,7 @@
       break;
       
     case tok::string_literal:
+    case tok::wide_string_literal:
       ConsumeStringToken();
       break;
     case tok::semi:
@@ -405,7 +406,7 @@
 ///         string-literal
 ///
 void Parser::ParseAsmStringLiteral() {
-  if (Tok.getKind() != tok::string_literal) {
+  if (!isTokenStringLiteral()) {
     Diag(Tok, diag::err_expected_string_literal);
     return;
   }

Modified: cfe/cfe/trunk/Sema/ASTStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Sema/ASTStreamer.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Sema/ASTStreamer.cpp (original)
+++ cfe/cfe/trunk/Sema/ASTStreamer.cpp Wed Jul 11 11:26:15 2007
@@ -19,7 +19,7 @@
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo);
 
 
 namespace {
@@ -27,7 +27,7 @@
     Parser P;
   public:
     ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo)
-      : P(PP, *CreateASTBuilderActions(FullLocInfo)) {
+      : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) {
       PP.EnterSourceFile(MainFileID, 0, true);
       
       // Initialize the parser.

Modified: cfe/cfe/trunk/Sema/Sema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Sema/Sema.cpp?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/Sema/Sema.cpp (original)
+++ cfe/cfe/trunk/Sema/Sema.cpp Wed Jul 11 11:26:15 2007
@@ -18,18 +18,23 @@
 #include "clang/Parse/Scope.h"
 #include "clang/Lex/IdentifierTable.h"
 #include "clang/Lex/LexerToken.h"
-#include "llvm/Support/Visibility.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/Compiler.h"
 using namespace llvm;
 using namespace clang;
 
 /// ASTBuilder
 namespace {
 class VISIBILITY_HIDDEN ASTBuilder : public Action {
+  Preprocessor &PP;
+  
   /// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that
   /// capture maximal location information for each source-language construct.
   bool FullLocInfo;
 public:
-  ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {}
+  ASTBuilder(Preprocessor &pp, bool fullLocInfo)
+    : PP(pp), FullLocInfo(fullLocInfo) {}
+  
   //===--------------------------------------------------------------------===//
   // Symbol table tracking callbacks.
   //
@@ -47,6 +52,9 @@
   virtual ExprResult ParseFloatingConstant(const LexerToken &Tok);
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val);
+  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+                                     bool isWide,
+                                     const LexerToken *Toks, unsigned NumToks);
   
   // Binary/Unary Operators.  'Tok' is the token for the operator.
   virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input);
@@ -166,6 +174,26 @@
   return new ParenExpr(L, R, (Expr*)Val);
 }
 
+/// ParseStringExpr - This accepts a string after semantic analysis. This string
+/// may be the result of string concatenation ([C99 5.1.1.2, translation phase
+/// #6]), so it may come from multiple tokens.
+/// 
+Action::ExprResult ASTBuilder::
+ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
+                const LexerToken *Toks, unsigned NumToks) {
+  assert(NumToks && "Must have at least one string!");
+  
+  if (!FullLocInfo)
+    return new StringExpr(StrData, StrLen, isWide);
+  else {
+    SmallVector<SourceLocation, 4> Locs;
+    for (unsigned i = 0; i != NumToks; ++i)
+      Locs.push_back(Toks[i].getLocation());
+    return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size());
+  }
+}
+
+
 // Unary Operators.  'Tok' is the token for the operator.
 Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok,
                                             ExprTy *Input) {
@@ -326,8 +354,8 @@
 
 /// Interface to the Builder.cpp file.
 ///
-Action *CreateASTBuilderActions(bool FullLocInfo) {
-  return new ASTBuilder(FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) {
+  return new ASTBuilder(PP, FullLocInfo);
 }
 
 

Modified: cfe/cfe/trunk/clang.xcodeproj/project.pbxproj
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/clang.xcodeproj/project.pbxproj?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/clang.xcodeproj/project.pbxproj (original)
+++ cfe/cfe/trunk/clang.xcodeproj/project.pbxproj Wed Jul 11 11:26:15 2007
@@ -424,7 +424,7 @@
 		1DEB923208733DC60010E9CD /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ARCHS = ppc;
+				ARCHS = i386;
 				COPY_PHASE_STRIP = NO;
 				GCC_CW_ASM_SYNTAX = NO;
 				GCC_DYNAMIC_NO_PIC = NO;
@@ -460,7 +460,7 @@
 		1DEB923308733DC60010E9CD /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
-				ARCHS = ppc;
+				ARCHS = i386;
 				GCC_CW_ASM_SYNTAX = NO;
 				GCC_ENABLE_CPP_EXCEPTIONS = NO;
 				GCC_ENABLE_CPP_RTTI = NO;

Modified: cfe/cfe/trunk/include/clang/AST/Expr.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/AST/Expr.h?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/AST/Expr.h (original)
+++ cfe/cfe/trunk/include/clang/AST/Expr.h Wed Jul 11 11:26:15 2007
@@ -15,7 +15,7 @@
 #define LLVM_CLANG_AST_EXPR_H
 
 #include "clang/Basic/SourceLocation.h"
-#include <cassert>
+#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
 namespace clang {
@@ -64,6 +64,26 @@
   virtual void dump_impl() const;
 };
 
+class StringExpr : public Expr {
+  const char *StrData;
+  unsigned ByteLength;
+  bool isWide;
+public:
+  StringExpr(const char *strData, unsigned byteLength, bool Wide);
+  virtual ~StringExpr();
+  virtual void dump_impl() const;
+};
+
+class StringExprLOC : public StringExpr {
+  // Locations for the string tokens before string concatenation.
+  SmallVector<SourceLocation, 4> Locs;
+public:
+  StringExprLOC(const char *StrData, unsigned ByteLength, bool isWide,
+                SourceLocation *L, unsigned NumLocs)
+    : StringExpr(StrData, ByteLength, isWide), Locs(L, L+NumLocs) {
+  }
+};
+
 /// ParenExpr - This represents a parethesized expression, e.g. "(1)".  This
 /// AST node is only formed if full location information is requested.
 class ParenExpr : public Expr {

Modified: cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def (original)
+++ cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def Wed Jul 11 11:26:15 2007
@@ -342,6 +342,18 @@
 DIAG(err_matching, ERROR,
      "to match this '%s'")
 
+//===----------------------------------------------------------------------===//
+// Semantic Analysis
+//===----------------------------------------------------------------------===//
+
+DIAG(ext_nonstandard_escape, EXTENSION,
+     "use of non-standard escape character '\\%s'")
+DIAG(ext_unknown_escape, EXTENSION,
+     "unknown escape sequence '\\%s'")
+
+DIAG(err_hex_escape_no_digits, ERROR,
+     "\\x used with no following hex digits")
+
 DIAG(err_typename_requires_specqual, ERROR,
      "type name requires a specifier or qualifier")
 DIAG(err_typename_invalid_storageclass, ERROR,

Modified: cfe/cfe/trunk/include/clang/Basic/TokenKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/TokenKinds.def?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/TokenKinds.def (original)
+++ cfe/cfe/trunk/include/clang/Basic/TokenKinds.def Wed Jul 11 11:26:15 2007
@@ -42,7 +42,8 @@
 TOK(char_constant)       // 'a'   L'b'
 
 // C99 6.4.5: String Literals.
-TOK(string_literal)      // "foo"  L"foo"
+TOK(string_literal)      // "foo"
+TOK(wide_string_literal) // L"foo"
 TOK(angle_string_literal)// <foo>
 
 // C99 6.4.6: Punctuators.

Modified: cfe/cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/Lexer.h?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/cfe/trunk/include/clang/Lex/Lexer.h Wed Jul 11 11:26:15 2007
@@ -329,7 +329,7 @@
   // Helper functions to lex the remainder of a token of the specific type.
   void LexIdentifier         (LexerToken &Result, const char *CurPtr);
   void LexNumericConstant    (LexerToken &Result, const char *CurPtr);
-  void LexStringLiteral      (LexerToken &Result, const char *CurPtr);
+  void LexStringLiteral      (LexerToken &Result, const char *CurPtr,bool Wide);
   void LexAngledStringLiteral(LexerToken &Result, const char *CurPtr);
   void LexCharConstant       (LexerToken &Result, const char *CurPtr);
   bool LexEndOfFile          (LexerToken &Result, const char *CurPtr);

Modified: cfe/cfe/trunk/include/clang/Parse/Action.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Parse/Action.h?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Parse/Action.h (original)
+++ cfe/cfe/trunk/include/clang/Parse/Action.h Wed Jul 11 11:26:15 2007
@@ -94,11 +94,19 @@
   virtual ExprResult ParseSimplePrimaryExpr(const LexerToken &Tok) { return 0; }
   virtual ExprResult ParseIntegerConstant(const LexerToken &Tok) { return 0; }
   virtual ExprResult ParseFloatingConstant(const LexerToken &Tok) { return 0; }
-
   virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
                                     ExprTy *Val) {
     return Val;  // Default impl returns operand.
   }
+  
+  /// ParseStringExpr - The (null terminated) string data is specified with
+  /// StrData+StrLen.  isWide is true if this is a wide string. The Toks/NumToks
+  /// array exposes the input tokens to provide location information.
+  virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+                                     bool isWide,
+                                     const LexerToken *Toks, unsigned NumToks) {
+    return 0;
+  }
 
   // Postfix Expressions.
   virtual ExprResult ParsePostfixUnaryOp(const LexerToken &Tok, ExprTy *Input) {

Modified: cfe/cfe/trunk/include/clang/Parse/Parser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Parse/Parser.h?rev=38960&r1=38959&r2=38960&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Parse/Parser.h (original)
+++ cfe/cfe/trunk/include/clang/Parse/Parser.h Wed Jul 11 11:26:15 2007
@@ -89,7 +89,8 @@
   /// isTokenStringLiteral - True if this token is a string-literal.
   ///
   bool isTokenStringLiteral() const {
-    return Tok.getKind() == tok::string_literal;
+    return Tok.getKind() == tok::string_literal ||
+           Tok.getKind() == tok::wide_string_literal;
   }
   
   /// ConsumeToken - Consume the current 'peek token' and lex the next one.