[cfe-commits] r38960 - in /cfe/cfe/trunk: AST/ASTStreamer.cpp AST/Expr.cpp AST/Sema.cpp Lex/Lexer.cpp Lex/MacroExpander.cpp Lex/Pragma.cpp Lex/Preprocessor.cpp Parse/ParseExpr.cpp Parse/ParseStmt.cpp Parse/Parser.cpp Sema/ASTStreamer.cpp Sema/Sema.cpp clang.xcodeproj/project.pbxproj include/clang/AST/Expr.h include/clang/Basic/DiagnosticKinds.def include/clang/Basic/TokenKinds.def include/clang/Lex/Lexer.h include/clang/Parse/Action.h include/clang/Parse/Parser.h
sabre at cs.uiuc.edu
sabre at cs.uiuc.edu
Wed Jul 11 09:26:15 PDT 2007
Author: sabre
Date: Wed Jul 11 11:26:15 2007
New Revision: 38960
URL: http://llvm.org/viewvc/llvm-project?rev=38960&view=rev
Log:
Initial support for semantic analysis and AST building for StringExpr nodes.
Modified:
cfe/cfe/trunk/AST/ASTStreamer.cpp
cfe/cfe/trunk/AST/Expr.cpp
cfe/cfe/trunk/AST/Sema.cpp
cfe/cfe/trunk/Lex/Lexer.cpp
cfe/cfe/trunk/Lex/MacroExpander.cpp
cfe/cfe/trunk/Lex/Pragma.cpp
cfe/cfe/trunk/Lex/Preprocessor.cpp
cfe/cfe/trunk/Parse/ParseExpr.cpp
cfe/cfe/trunk/Parse/ParseStmt.cpp
cfe/cfe/trunk/Parse/Parser.cpp
cfe/cfe/trunk/Sema/ASTStreamer.cpp
cfe/cfe/trunk/Sema/Sema.cpp
cfe/cfe/trunk/clang.xcodeproj/project.pbxproj
cfe/cfe/trunk/include/clang/AST/Expr.h
cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def
cfe/cfe/trunk/include/clang/Basic/TokenKinds.def
cfe/cfe/trunk/include/clang/Lex/Lexer.h
cfe/cfe/trunk/include/clang/Parse/Action.h
cfe/cfe/trunk/include/clang/Parse/Parser.h
Modified: cfe/cfe/trunk/AST/ASTStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/ASTStreamer.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/AST/ASTStreamer.cpp (original)
+++ cfe/cfe/trunk/AST/ASTStreamer.cpp Wed Jul 11 11:26:15 2007
@@ -19,7 +19,7 @@
/// Interface to the Builder.cpp file.
///
-Action *CreateASTBuilderActions(bool FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo);
namespace {
@@ -27,7 +27,7 @@
Parser P;
public:
ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo)
- : P(PP, *CreateASTBuilderActions(FullLocInfo)) {
+ : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) {
PP.EnterSourceFile(MainFileID, 0, true);
// Initialize the parser.
Modified: cfe/cfe/trunk/AST/Expr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/Expr.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/AST/Expr.cpp (original)
+++ cfe/cfe/trunk/AST/Expr.cpp Wed Jul 11 11:26:15 2007
@@ -43,6 +43,28 @@
std::cerr << "1.0";
}
+
+
+StringExpr::StringExpr(const char *strData, unsigned byteLength, bool Wide) {
+ // OPTIMIZE: could allocate this appended to the StringExpr.
+ char *AStrData = new char[byteLength];
+ memcpy(AStrData, strData, byteLength);
+ StrData = AStrData;
+ ByteLength = byteLength;
+ isWide = Wide;
+}
+
+StringExpr::~StringExpr() {
+ delete[] StrData;
+}
+
+void StringExpr::dump_impl() const {
+ if (isWide) std::cerr << 'L';
+ std::cerr << '"' << StrData << '"';
+}
+
+
+
void ParenExpr::dump_impl() const {
std::cerr << "'('";
Val->dump();
Modified: cfe/cfe/trunk/AST/Sema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/AST/Sema.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/AST/Sema.cpp (original)
+++ cfe/cfe/trunk/AST/Sema.cpp Wed Jul 11 11:26:15 2007
@@ -18,18 +18,23 @@
#include "clang/Parse/Scope.h"
#include "clang/Lex/IdentifierTable.h"
#include "clang/Lex/LexerToken.h"
-#include "llvm/Support/Visibility.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/Compiler.h"
using namespace llvm;
using namespace clang;
/// ASTBuilder
namespace {
class VISIBILITY_HIDDEN ASTBuilder : public Action {
+ Preprocessor &PP;
+
/// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that
/// capture maximal location information for each source-language construct.
bool FullLocInfo;
public:
- ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {}
+ ASTBuilder(Preprocessor &pp, bool fullLocInfo)
+ : PP(pp), FullLocInfo(fullLocInfo) {}
+
//===--------------------------------------------------------------------===//
// Symbol table tracking callbacks.
//
@@ -47,6 +52,9 @@
virtual ExprResult ParseFloatingConstant(const LexerToken &Tok);
virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
ExprTy *Val);
+ virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+ bool isWide,
+ const LexerToken *Toks, unsigned NumToks);
// Binary/Unary Operators. 'Tok' is the token for the operator.
virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input);
@@ -166,6 +174,26 @@
return new ParenExpr(L, R, (Expr*)Val);
}
+/// ParseStringExpr - This accepts a string after semantic analysis. This string
+/// may be the result of string concatenation ([C99 5.1.1.2, translation phase
+/// #6]), so it may come from multiple tokens.
+///
+Action::ExprResult ASTBuilder::
+ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
+ const LexerToken *Toks, unsigned NumToks) {
+ assert(NumToks && "Must have at least one string!");
+
+ if (!FullLocInfo)
+ return new StringExpr(StrData, StrLen, isWide);
+ else {
+ SmallVector<SourceLocation, 4> Locs;
+ for (unsigned i = 0; i != NumToks; ++i)
+ Locs.push_back(Toks[i].getLocation());
+ return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size());
+ }
+}
+
+
// Unary Operators. 'Tok' is the token for the operator.
Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok,
ExprTy *Input) {
@@ -326,8 +354,8 @@
/// Interface to the Builder.cpp file.
///
-Action *CreateASTBuilderActions(bool FullLocInfo) {
- return new ASTBuilder(FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) {
+ return new ASTBuilder(PP, FullLocInfo);
}
Modified: cfe/cfe/trunk/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Lexer.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/Lexer.cpp (original)
+++ cfe/cfe/trunk/Lex/Lexer.cpp Wed Jul 11 11:26:15 2007
@@ -444,7 +444,7 @@
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
/// either " or L".
-void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
+void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr, bool Wide){
const char *NulCharacter = 0; // Does this string contain the \0 character?
char C = getAndAdvanceChar(CurPtr, Result);
@@ -468,7 +468,7 @@
// If a nul character existed in the string, warn about it.
if (NulCharacter) Diag(NulCharacter, diag::null_in_string);
- Result.SetKind(tok::string_literal);
+ Result.SetKind(Wide ? tok::wide_string_literal : tok::string_literal);
// Update the location of the token as well as the BufferPtr instance var.
FormTokenWithChars(Result, CurPtr);
@@ -1104,7 +1104,8 @@
// Wide string literal.
if (Char == '"')
- return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result));
+ return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+ true);
// Wide character constant.
if (Char == '\'')
@@ -1143,7 +1144,7 @@
case '"':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();
- return LexStringLiteral(Result, CurPtr);
+ return LexStringLiteral(Result, CurPtr, false);
// C99 6.4.6: Punctuators.
case '?':
Modified: cfe/cfe/trunk/Lex/MacroExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/MacroExpander.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/MacroExpander.cpp (original)
+++ cfe/cfe/trunk/Lex/MacroExpander.cpp Wed Jul 11 11:26:15 2007
@@ -161,8 +161,9 @@
// If this is a string or character constant, escape the token as specified
// by 6.10.3.2p2.
- if (Tok.getKind() == tok::string_literal || // "foo" and L"foo".
- Tok.getKind() == tok::char_constant) { // 'x' and L'x'.
+ if (Tok.getKind() == tok::string_literal || // "foo"
+ Tok.getKind() == tok::wide_string_literal || // L"foo"
+ Tok.getKind() == tok::char_constant) { // 'x' and L'x'.
Result += Lexer::Stringify(PP.getSpelling(Tok));
} else {
// Otherwise, just append the token.
Modified: cfe/cfe/trunk/Lex/Pragma.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Pragma.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/Pragma.cpp (original)
+++ cfe/cfe/trunk/Lex/Pragma.cpp Wed Jul 11 11:26:15 2007
@@ -96,7 +96,8 @@
// Read the '"..."'.
Lex(Tok);
- if (Tok.getKind() != tok::string_literal)
+ if (Tok.getKind() != tok::string_literal &&
+ Tok.getKind() != tok::wide_string_literal)
return Diag(PragmaLoc, diag::err__Pragma_malformed);
// Remember the string.
Modified: cfe/cfe/trunk/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Preprocessor.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/Preprocessor.cpp (original)
+++ cfe/cfe/trunk/Lex/Preprocessor.cpp Wed Jul 11 11:26:15 2007
@@ -444,7 +444,6 @@
/// tokens from it instead of the current buffer.
void Preprocessor::EnterMacro(LexerToken &Tok, MacroArgs *Args) {
IdentifierInfo *Identifier = Tok.getIdentifierInfo();
- MacroInfo &MI = *Identifier->getMacroInfo();
IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
CurMacroExpander));
CurLexer = 0;
@@ -1492,7 +1491,8 @@
Lex(StrTok);
// If the token kind isn't a string, it's a malformed directive.
- if (StrTok.getKind() != tok::string_literal)
+ if (StrTok.getKind() != tok::string_literal &&
+ StrTok.getKind() != tok::wide_string_literal)
return Diag(StrTok, diag::err_pp_malformed_ident);
// Verify that there is nothing after the string, other than EOM.
Modified: cfe/cfe/trunk/Parse/ParseExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/ParseExpr.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Parse/ParseExpr.cpp (original)
+++ cfe/cfe/trunk/Parse/ParseExpr.cpp Wed Jul 11 11:26:15 2007
@@ -22,6 +22,8 @@
#include "clang/Parse/Parser.h"
#include "clang/Basic/Diagnostic.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/Alloca.h"
using namespace llvm;
using namespace clang;
@@ -490,6 +492,7 @@
// These can be followed by postfix-expr pieces.
return ParsePostfixExpressionSuffix(Res);
case tok::string_literal: // primary-expression: string-literal
+ case tok::wide_string_literal:
Res = ParseStringLiteralExpression();
if (Res.isInvalid) return Res;
// This can be followed by postfix-expr pieces (e.g. "foo"[1]).
@@ -809,24 +812,6 @@
return ParsePostfixExpressionSuffix(Res);
}
-/// ParseStringLiteralExpression - This handles the various token types that
-/// form string literals, and also handles string concatenation [C99 5.1.1.2,
-/// translation phase #6].
-///
-/// primary-expression: [C99 6.5.1]
-/// string-literal
-Parser::ExprResult Parser::ParseStringLiteralExpression() {
- assert(isTokenStringLiteral() && "Not a string literal!");
- ConsumeStringToken();
-
- // String concat. Note that keywords like __func__ and __FUNCTION__ aren't
- // considered to be strings.
- while (isTokenStringLiteral())
- ConsumeStringToken();
- // TODO: Build AST for string literals.
- return ExprResult(false);
-}
-
/// ParseParenExpression - This parses the unit that starts with a '(' token,
/// based on what is allowed by ExprType. The actual thing parsed is returned
@@ -906,3 +891,223 @@
return Result;
}
+
+/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
+/// not valid.
+static int HexDigitValue(char C) {
+ if (C >= '0' && C <= '9') return C-'0';
+ if (C >= 'a' && C <= 'f') return C-'a'+10;
+ if (C >= 'A' && C <= 'F') return C-'A'+10;
+ return -1;
+}
+
+/// ParseStringLiteralExpression - This handles the various token types that
+/// form string literals, and also handles string concatenation [C99 5.1.1.2,
+/// translation phase #6].
+///
+/// primary-expression: [C99 6.5.1]
+/// string-literal
+Parser::ExprResult Parser::ParseStringLiteralExpression() {
+ assert(isTokenStringLiteral() && "Not a string literal!");
+
+ // String concat. Note that keywords like __func__ and __FUNCTION__ are not
+ // considered to be strings for concatenation purposes.
+ SmallVector<LexerToken, 4> StringToks;
+
+ // While we're looking at all of the string portions, remember the max
+ // individual token length, computing a bound on the concatenated string
+ // length, and see whether any piece is a wide-string. If any of the string
+ // portions is a wide-string literal, the result is also a wide-string literal
+ // [C99 6.4.5p4].
+ unsigned SizeBound = 0, MaxTokenLength = 0;
+ bool AnyWide = false;
+ do {
+ // The string could be shorter than this if it needs cleaning, but this is a
+ // reasonable bound, which is all we need.
+ SizeBound += Tok.getLength()-2; // -2 for "".
+
+ // Find maximum string piece length.
+ if (Tok.getLength() > MaxTokenLength)
+ MaxTokenLength = Tok.getLength();
+
+ // Remember if we see any wide strings.
+ AnyWide |= Tok.getKind() == tok::wide_string_literal;
+
+ // Remember the string token.
+ StringToks.push_back(Tok);
+ ConsumeStringToken();
+ } while (isTokenStringLiteral());
+
+ // Include space for the null terminator.
+ ++SizeBound;
+
+ // TODO: K&R warning: "traditional C rejects string constant concatenation"
+
+ // FIXME: Size of wchar_t should not be hardcoded!
+ unsigned wchar_tByteWidth = 4;
+
+ // The output buffer size needs to be large enough to hold wide characters.
+ // This is a worst-case assumption which basically corresponds to L"" "long".
+ if (AnyWide)
+ SizeBound *= wchar_tByteWidth;
+
+ // Create a temporary buffer to hold the result string data. If it is "big",
+ // use malloc, otherwise use alloca.
+ char *ResultBuf;
+ if (SizeBound > 512)
+ ResultBuf = (char*)malloc(SizeBound);
+ else
+ ResultBuf = (char*)alloca(SizeBound);
+
+ // Likewise, but for each string piece.
+ char *TokenBuf;
+ if (MaxTokenLength > 512)
+ TokenBuf = (char*)malloc(MaxTokenLength);
+ else
+ TokenBuf = (char*)alloca(MaxTokenLength);
+
+ // Loop over all the strings, getting their spelling, and expanding them to
+ // wide strings as appropriate.
+ char *ResultPtr = ResultBuf; // Next byte to fill in.
+
+ for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
+ const char *ThisTokBuf = TokenBuf;
+ // Get the spelling of the token, which eliminates trigraphs, etc. We know
+ // that ThisTokBuf points to a buffer that is big enough for the whole token
+ // and 'spelled' tokens can only shrink.
+ unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+ const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
+
+ // TODO: Input character set mapping support.
+
+ // Skip L marker for wide strings.
+ if (ThisTokBuf[0] == 'L') ++ThisTokBuf;
+
+ assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
+ ++ThisTokBuf;
+
+ while (ThisTokBuf != ThisTokEnd) {
+ // Is this a span of non-escape characters?
+ if (ThisTokBuf[0] != '\\') {
+ const char *InStart = ThisTokBuf;
+ do {
+ ++ThisTokBuf;
+ } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
+
+ // Copy the character span over.
+ unsigned Len = ThisTokBuf-InStart;
+ if (!AnyWide) {
+ memcpy(ResultPtr, InStart, Len);
+ ResultPtr += Len;
+ } else {
+ // Note: our internal rep of wide char tokens is always little-endian.
+ for (; Len; --Len, ++InStart) {
+ *ResultPtr++ = InStart[0];
+ // Add zeros at the end.
+ for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+ *ResultPtr++ = 0;
+ }
+ }
+ continue;
+ }
+
+ // Otherwise, this is an escape character. Skip the '\' char.
+ ++ThisTokBuf;
+
+ // We know that this character can't be off the end of the buffer, because
+ // that would have been \", which would not have been the end of string.
+ unsigned ResultChar = *ThisTokBuf++;
+ switch (ResultChar) {
+ // These map to themselves.
+ case '\\': case '\'': case '"': case '?': break;
+
+ // These have fixed mappings.
+ case 'a':
+ // TODO: K&R: the meaning of '\\a' is different in traditional C
+ ResultChar = 7;
+ break;
+ case 'b':
+ ResultChar = 8;
+ break;
+ case 'e':
+ PP.Diag(StringToks[i], diag::ext_nonstandard_escape, "e");
+ ResultChar = 27;
+ break;
+ case 'f':
+ ResultChar = 12;
+ break;
+ case 'n':
+ ResultChar = 10;
+ break;
+ case 'r':
+ ResultChar = 13;
+ break;
+ case 't':
+ ResultChar = 9;
+ break;
+ case 'v':
+ ResultChar = 11;
+ break;
+
+ //case 'u': case 'U': // FIXME: UCNs.
+ case 'x': // Hex escape.
+ if (ThisTokBuf == ThisTokEnd ||
+ (ResultChar = HexDigitValue(*ThisTokBuf)) == ~0U) {
+ PP.Diag(StringToks[i], diag::err_hex_escape_no_digits);
+ ResultChar = 0;
+ break;
+ }
+ ++ThisTokBuf; // Consumed one hex digit.
+
+ assert(0 && "hex escape: unimp!");
+ break;
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ // Octal escapes.
+ assert(0 && "octal escape: unimp!");
+ break;
+
+ // Otherwise, these are not valid escapes.
+ case '(': case '{': case '[': case '%':
+ // GCC accepts these as extensions. We warn about them as such though.
+ if (!PP.getLangOptions().NoExtensions) {
+ PP.Diag(StringToks[i], diag::ext_nonstandard_escape,
+ std::string()+(char)ResultChar);
+ break;
+ }
+ // FALL THROUGH.
+ default:
+ if (isgraph(ThisTokBuf[0])) {
+ PP.Diag(StringToks[i], diag::ext_unknown_escape,
+ std::string()+(char)ResultChar);
+ } else {
+ PP.Diag(StringToks[i], diag::ext_unknown_escape,
+ "x"+utohexstr(ResultChar));
+ }
+ }
+
+ // Note: our internal rep of wide char tokens is always little-endian.
+ for (unsigned i = 0, e = wchar_tByteWidth; i != e; ++i)
+ *ResultPtr++ = ResultChar >> i*8;
+ }
+ }
+
+ // Add zero terminator.
+ *ResultPtr = 0;
+ if (AnyWide) {
+ for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+ *ResultPtr++ = 0;
+ }
+
+ // Hand this off to the Actions.
+ ExprResult Res = Actions.ParseStringExpr(ResultBuf, ResultPtr-ResultBuf,
+ AnyWide,
+ &StringToks[0], StringToks.size());
+
+ // If either buffer was heap allocated, release it now.
+ if (MaxTokenLength > 512) free(TokenBuf);
+ if (SizeBound > 512) free(ResultBuf);
+
+ return Res;
+}
+
Modified: cfe/cfe/trunk/Parse/ParseStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/ParseStmt.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Parse/ParseStmt.cpp (original)
+++ cfe/cfe/trunk/Parse/ParseStmt.cpp Wed Jul 11 11:26:15 2007
@@ -597,7 +597,7 @@
if (Tok.getKind() == tok::colon) {
ConsumeToken();
- if (Tok.getKind() == tok::string_literal) {
+ if (isTokenStringLiteral()) {
// Parse the asm-string list for clobbers.
while (1) {
ParseAsmStringLiteral();
@@ -629,7 +629,7 @@
ConsumeToken();
// 'asm-operands' isn't present?
- if (Tok.getKind() != tok::string_literal && Tok.getKind() != tok::l_square)
+ if (!isTokenStringLiteral() && Tok.getKind() != tok::l_square)
return;
while (1) {
Modified: cfe/cfe/trunk/Parse/Parser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/Parser.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Parse/Parser.cpp (original)
+++ cfe/cfe/trunk/Parse/Parser.cpp Wed Jul 11 11:26:15 2007
@@ -152,6 +152,7 @@
break;
case tok::string_literal:
+ case tok::wide_string_literal:
ConsumeStringToken();
break;
case tok::semi:
@@ -405,7 +406,7 @@
/// string-literal
///
void Parser::ParseAsmStringLiteral() {
- if (Tok.getKind() != tok::string_literal) {
+ if (!isTokenStringLiteral()) {
Diag(Tok, diag::err_expected_string_literal);
return;
}
Modified: cfe/cfe/trunk/Sema/ASTStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Sema/ASTStreamer.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Sema/ASTStreamer.cpp (original)
+++ cfe/cfe/trunk/Sema/ASTStreamer.cpp Wed Jul 11 11:26:15 2007
@@ -19,7 +19,7 @@
/// Interface to the Builder.cpp file.
///
-Action *CreateASTBuilderActions(bool FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo);
namespace {
@@ -27,7 +27,7 @@
Parser P;
public:
ASTStreamer(Preprocessor &PP, unsigned MainFileID, bool FullLocInfo)
- : P(PP, *CreateASTBuilderActions(FullLocInfo)) {
+ : P(PP, *CreateASTBuilderActions(PP, FullLocInfo)) {
PP.EnterSourceFile(MainFileID, 0, true);
// Initialize the parser.
Modified: cfe/cfe/trunk/Sema/Sema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Sema/Sema.cpp?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/Sema/Sema.cpp (original)
+++ cfe/cfe/trunk/Sema/Sema.cpp Wed Jul 11 11:26:15 2007
@@ -18,18 +18,23 @@
#include "clang/Parse/Scope.h"
#include "clang/Lex/IdentifierTable.h"
#include "clang/Lex/LexerToken.h"
-#include "llvm/Support/Visibility.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/Compiler.h"
using namespace llvm;
using namespace clang;
/// ASTBuilder
namespace {
class VISIBILITY_HIDDEN ASTBuilder : public Action {
+ Preprocessor &PP;
+
/// FullLocInfo - If this is true, the ASTBuilder constructs AST Nodes that
/// capture maximal location information for each source-language construct.
bool FullLocInfo;
public:
- ASTBuilder(bool fullLocInfo) : FullLocInfo(fullLocInfo) {}
+ ASTBuilder(Preprocessor &pp, bool fullLocInfo)
+ : PP(pp), FullLocInfo(fullLocInfo) {}
+
//===--------------------------------------------------------------------===//
// Symbol table tracking callbacks.
//
@@ -47,6 +52,9 @@
virtual ExprResult ParseFloatingConstant(const LexerToken &Tok);
virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
ExprTy *Val);
+ virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+ bool isWide,
+ const LexerToken *Toks, unsigned NumToks);
// Binary/Unary Operators. 'Tok' is the token for the operator.
virtual ExprResult ParseUnaryOp(const LexerToken &Tok, ExprTy *Input);
@@ -166,6 +174,26 @@
return new ParenExpr(L, R, (Expr*)Val);
}
+/// ParseStringExpr - This accepts a string after semantic analysis. This string
+/// may be the result of string concatenation ([C99 5.1.1.2, translation phase
+/// #6]), so it may come from multiple tokens.
+///
+Action::ExprResult ASTBuilder::
+ParseStringExpr(const char *StrData, unsigned StrLen, bool isWide,
+ const LexerToken *Toks, unsigned NumToks) {
+ assert(NumToks && "Must have at least one string!");
+
+ if (!FullLocInfo)
+ return new StringExpr(StrData, StrLen, isWide);
+ else {
+ SmallVector<SourceLocation, 4> Locs;
+ for (unsigned i = 0; i != NumToks; ++i)
+ Locs.push_back(Toks[i].getLocation());
+ return new StringExprLOC(StrData, StrLen, isWide, &Locs[0], Locs.size());
+ }
+}
+
+
// Unary Operators. 'Tok' is the token for the operator.
Action::ExprResult ASTBuilder::ParseUnaryOp(const LexerToken &Tok,
ExprTy *Input) {
@@ -326,8 +354,8 @@
/// Interface to the Builder.cpp file.
///
-Action *CreateASTBuilderActions(bool FullLocInfo) {
- return new ASTBuilder(FullLocInfo);
+Action *CreateASTBuilderActions(Preprocessor &PP, bool FullLocInfo) {
+ return new ASTBuilder(PP, FullLocInfo);
}
Modified: cfe/cfe/trunk/clang.xcodeproj/project.pbxproj
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/clang.xcodeproj/project.pbxproj?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/clang.xcodeproj/project.pbxproj (original)
+++ cfe/cfe/trunk/clang.xcodeproj/project.pbxproj Wed Jul 11 11:26:15 2007
@@ -424,7 +424,7 @@
1DEB923208733DC60010E9CD /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
- ARCHS = ppc;
+ ARCHS = i386;
COPY_PHASE_STRIP = NO;
GCC_CW_ASM_SYNTAX = NO;
GCC_DYNAMIC_NO_PIC = NO;
@@ -460,7 +460,7 @@
1DEB923308733DC60010E9CD /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
- ARCHS = ppc;
+ ARCHS = i386;
GCC_CW_ASM_SYNTAX = NO;
GCC_ENABLE_CPP_EXCEPTIONS = NO;
GCC_ENABLE_CPP_RTTI = NO;
Modified: cfe/cfe/trunk/include/clang/AST/Expr.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/AST/Expr.h?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/AST/Expr.h (original)
+++ cfe/cfe/trunk/include/clang/AST/Expr.h Wed Jul 11 11:26:15 2007
@@ -15,7 +15,7 @@
#define LLVM_CLANG_AST_EXPR_H
#include "clang/Basic/SourceLocation.h"
-#include <cassert>
+#include "llvm/ADT/SmallVector.h"
namespace llvm {
namespace clang {
@@ -64,6 +64,26 @@
virtual void dump_impl() const;
};
+class StringExpr : public Expr {
+ const char *StrData;
+ unsigned ByteLength;
+ bool isWide;
+public:
+ StringExpr(const char *strData, unsigned byteLength, bool Wide);
+ virtual ~StringExpr();
+ virtual void dump_impl() const;
+};
+
+class StringExprLOC : public StringExpr {
+ // Locations for the string tokens before string concatenation.
+ SmallVector<SourceLocation, 4> Locs;
+public:
+ StringExprLOC(const char *StrData, unsigned ByteLength, bool isWide,
+ SourceLocation *L, unsigned NumLocs)
+ : StringExpr(StrData, ByteLength, isWide), Locs(L, L+NumLocs) {
+ }
+};
+
/// ParenExpr - This represents a parethesized expression, e.g. "(1)". This
/// AST node is only formed if full location information is requested.
class ParenExpr : public Expr {
Modified: cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def (original)
+++ cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def Wed Jul 11 11:26:15 2007
@@ -342,6 +342,18 @@
DIAG(err_matching, ERROR,
"to match this '%s'")
+//===----------------------------------------------------------------------===//
+// Semantic Analysis
+//===----------------------------------------------------------------------===//
+
+DIAG(ext_nonstandard_escape, EXTENSION,
+ "use of non-standard escape character '\\%s'")
+DIAG(ext_unknown_escape, EXTENSION,
+ "unknown escape sequence '\\%s'")
+
+DIAG(err_hex_escape_no_digits, ERROR,
+ "\\x used with no following hex digits")
+
DIAG(err_typename_requires_specqual, ERROR,
"type name requires a specifier or qualifier")
DIAG(err_typename_invalid_storageclass, ERROR,
Modified: cfe/cfe/trunk/include/clang/Basic/TokenKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/TokenKinds.def?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/TokenKinds.def (original)
+++ cfe/cfe/trunk/include/clang/Basic/TokenKinds.def Wed Jul 11 11:26:15 2007
@@ -42,7 +42,8 @@
TOK(char_constant) // 'a' L'b'
// C99 6.4.5: String Literals.
-TOK(string_literal) // "foo" L"foo"
+TOK(string_literal) // "foo"
+TOK(wide_string_literal) // L"foo"
TOK(angle_string_literal)// <foo>
// C99 6.4.6: Punctuators.
Modified: cfe/cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/Lexer.h?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/cfe/trunk/include/clang/Lex/Lexer.h Wed Jul 11 11:26:15 2007
@@ -329,7 +329,7 @@
// Helper functions to lex the remainder of a token of the specific type.
void LexIdentifier (LexerToken &Result, const char *CurPtr);
void LexNumericConstant (LexerToken &Result, const char *CurPtr);
- void LexStringLiteral (LexerToken &Result, const char *CurPtr);
+ void LexStringLiteral (LexerToken &Result, const char *CurPtr,bool Wide);
void LexAngledStringLiteral(LexerToken &Result, const char *CurPtr);
void LexCharConstant (LexerToken &Result, const char *CurPtr);
bool LexEndOfFile (LexerToken &Result, const char *CurPtr);
Modified: cfe/cfe/trunk/include/clang/Parse/Action.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Parse/Action.h?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Parse/Action.h (original)
+++ cfe/cfe/trunk/include/clang/Parse/Action.h Wed Jul 11 11:26:15 2007
@@ -94,11 +94,19 @@
virtual ExprResult ParseSimplePrimaryExpr(const LexerToken &Tok) { return 0; }
virtual ExprResult ParseIntegerConstant(const LexerToken &Tok) { return 0; }
virtual ExprResult ParseFloatingConstant(const LexerToken &Tok) { return 0; }
-
virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R,
ExprTy *Val) {
return Val; // Default impl returns operand.
}
+
+ /// ParseStringExpr - The (null terminated) string data is specified with
+ /// StrData+StrLen. isWide is true if this is a wide string. The Toks/NumToks
+ /// array exposes the input tokens to provide location information.
+ virtual ExprResult ParseStringExpr(const char *StrData, unsigned StrLen,
+ bool isWide,
+ const LexerToken *Toks, unsigned NumToks) {
+ return 0;
+ }
// Postfix Expressions.
virtual ExprResult ParsePostfixUnaryOp(const LexerToken &Tok, ExprTy *Input) {
Modified: cfe/cfe/trunk/include/clang/Parse/Parser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Parse/Parser.h?rev=38960&r1=38959&r2=38960&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Parse/Parser.h (original)
+++ cfe/cfe/trunk/include/clang/Parse/Parser.h Wed Jul 11 11:26:15 2007
@@ -89,7 +89,8 @@
/// isTokenStringLiteral - True if this token is a string-literal.
///
bool isTokenStringLiteral() const {
- return Tok.getKind() == tok::string_literal;
+ return Tok.getKind() == tok::string_literal ||
+ Tok.getKind() == tok::wide_string_literal;
}
/// ConsumeToken - Consume the current 'peek token' and lex the next one.
More information about the cfe-commits
mailing list