[cfe-commits] r38783 - in /cfe/cfe/trunk: Driver/PrintPreprocessedOutput.cpp Driver/clang.cpp Driver/clang.h Lex/Lexer.cpp Lex/Preprocessor.cpp README.txt include/clang/Basic/TokenKinds.def include/clang/Lex/Lexer.h
sabre at cs.uiuc.edu
sabre at cs.uiuc.edu
Wed Jul 11 09:24:35 PDT 2007
Author: sabre
Date: Wed Jul 11 11:24:34 2007
New Revision: 38783
URL: http://llvm.org/viewvc/llvm-project?rev=38783&view=rev
Log:
Implement comment saving mode: the -C and -CC options.
Modified:
cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp
cfe/cfe/trunk/Driver/clang.cpp
cfe/cfe/trunk/Driver/clang.h
cfe/cfe/trunk/Lex/Lexer.cpp
cfe/cfe/trunk/Lex/Preprocessor.cpp
cfe/cfe/trunk/README.txt
cfe/cfe/trunk/include/clang/Basic/TokenKinds.def
cfe/cfe/trunk/include/clang/Lex/Lexer.h
Modified: cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp (original)
+++ cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp Wed Jul 11 11:24:34 2007
@@ -99,6 +99,11 @@
static cl::opt<bool>
DisableLineMarkers("P", cl::desc("Disable linemarker output in -E mode"));
+static cl::opt<bool>
+EnableCommentOutput("C", cl::desc("Enable comment output in -E mode"));
+static cl::opt<bool>
+EnableMacroCommentOutput("CC", cl::desc("Enable comment output in -E mode, "
+ "even from macro expansions"));
static unsigned EModeCurLine;
static std::string EModeCurFilename;
@@ -357,7 +362,12 @@
/// DoPrintPreprocessedInput - This implements -E mode.
///
-void clang::DoPrintPreprocessedInput(Preprocessor &PP) {
+void clang::DoPrintPreprocessedInput(Preprocessor &PP, LangOptions &Options) {
+ if (EnableCommentOutput) // -C specified?
+ Options.KeepComments = 1;
+ if (EnableMacroCommentOutput) // -CC specified?
+ Options.KeepComments = Options.KeepMacroComments = 1;
+
InitOutputBuffer();
LexerToken Tok, PrevTok;
Modified: cfe/cfe/trunk/Driver/clang.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Driver/clang.cpp?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/Driver/clang.cpp (original)
+++ cfe/cfe/trunk/Driver/clang.cpp Wed Jul 11 11:24:34 2007
@@ -703,7 +703,7 @@
}
case PrintPreprocessedInput: // -E mode.
- DoPrintPreprocessedInput(PP);
+ DoPrintPreprocessedInput(PP, Options);
break;
case DumpTokens: { // Token dump mode.
Modified: cfe/cfe/trunk/Driver/clang.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Driver/clang.h?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/Driver/clang.h (original)
+++ cfe/cfe/trunk/Driver/clang.h Wed Jul 11 11:24:34 2007
@@ -16,10 +16,11 @@
namespace llvm {
namespace clang {
-class Preprocessor;
+class Preprocessor;
+class LangOptions;
/// DoPrintPreprocessedInput - Implement -E mode.
-void DoPrintPreprocessedInput(Preprocessor &PP);
+void DoPrintPreprocessedInput(Preprocessor &PP, LangOptions &Options);
} // end namespace clang
} // end namespace llvm
Modified: cfe/cfe/trunk/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Lexer.cpp?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/Lexer.cpp (original)
+++ cfe/cfe/trunk/Lex/Lexer.cpp Wed Jul 11 11:24:34 2007
@@ -65,6 +65,9 @@
// to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block
// or otherwise skipping over tokens.
LexingRawMode = false;
+
+ // Default to keeping comments if requested.
+ KeepCommentMode = Features.KeepComments;
}
/// Stringify - Convert the specified string into a C string, with surrounding
@@ -587,13 +590,15 @@
// If the next token is obviously a // or /* */ comment, skip it efficiently
// too (without going through the big switch stmt).
- if (Char == '/' && CurPtr[1] == '/') {
+ if (Char == '/' && CurPtr[1] == '/' && !KeepCommentMode) {
BufferPtr = CurPtr;
- return SkipBCPLComment(Result, CurPtr+1);
+ SkipBCPLComment(Result, CurPtr+1);
+ return;
}
- if (Char == '/' && CurPtr[1] == '*') {
+ if (Char == '/' && CurPtr[1] == '*' && !KeepCommentMode) {
BufferPtr = CurPtr;
- return SkipBlockComment(Result, CurPtr+2);
+ SkipBlockComment(Result, CurPtr+2);
+ return;
}
BufferPtr = CurPtr;
}
@@ -601,7 +606,7 @@
// SkipBCPLComment - We have just read the // characters from input. Skip until
// we find the newline character thats terminate the comment. Then update
/// BufferPtr and return.
-void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
+bool Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
// If BCPL comments aren't explicitly enabled for this language, emit an
// extension warning.
if (!Features.BCPLComment) {
@@ -648,16 +653,20 @@
}
}
- if (CurPtr == BufferEnd+1) goto FoundEOF;
+ if (CurPtr == BufferEnd+1) { --CurPtr; break; }
} while (C != '\n' && C != '\r');
- // Found and did not consume a newline.
+ // Found but did not consume the newline.
+
+ // If we are returning comments as tokens, return this comment as a token.
+ if (KeepCommentMode)
+ return SaveBCPLComment(Result, CurPtr);
// If we are inside a preprocessor directive and we see the end of line,
// return immediately, so that the lexer can return this as an EOM token.
- if (ParsingPreprocessorDirective) {
+ if (ParsingPreprocessorDirective || CurPtr == BufferEnd) {
BufferPtr = CurPtr;
- return;
+ return true;
}
// Otherwise, eat the \n character. We don't care if this is a \n\r or
@@ -674,15 +683,33 @@
// big switch, handle it efficiently now.
if (isWhitespace(*CurPtr)) {
Result.SetFlag(LexerToken::LeadingSpace);
- return SkipWhitespace(Result, CurPtr+1);
+ SkipWhitespace(Result, CurPtr+1);
+ return true;
}
BufferPtr = CurPtr;
- return;
+ return true;
+}
-FoundEOF: // If we ran off the end of the buffer, return EOF.
- BufferPtr = CurPtr-1;
- return;
+/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in
+/// an appropriate way and return it.
+bool Lexer::SaveBCPLComment(LexerToken &Result, const char *CurPtr) {
+ Result.SetKind(tok::comment);
+ FormTokenWithChars(Result, CurPtr);
+
+ // If this BCPL-style comment is in a macro definition, transmogrify it into
+ // a C-style block comment.
+ if (ParsingPreprocessorDirective) {
+ std::string Spelling = PP.getSpelling(Result);
+ assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?");
+ Spelling[1] = '*'; // Change prefix to "/*".
+ Spelling += "*/"; // add suffix.
+
+ Result.SetLocation(PP.CreateString(&Spelling[0], Spelling.size(),
+ Result.getLocation()));
+ Result.SetLength(Spelling.size());
+ }
+ return false;
}
/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline
@@ -748,7 +775,7 @@
/// because they cannot cause the comment to end. The only thing that can
/// happen is the comment could end with an escaped newline between the */ end
/// of comment.
-void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
+bool Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
// Scan one character past where we should, looking for a '/' character. Once
// we find it, check to see if it was preceeded by a *. This common
// optimization helps people who like to put a lot of * characters in their
@@ -757,7 +784,7 @@
if (C == 0 && CurPtr == BufferEnd+1) {
Diag(BufferPtr, diag::err_unterminated_block_comment);
BufferPtr = CurPtr-1;
- return;
+ return true;
}
while (1) {
@@ -789,22 +816,31 @@
// after the /*, but this would involve lexing a lot of what really is the
// comment, which surely would confuse the parser.
BufferPtr = CurPtr-1;
- return;
+ return true;
}
C = *CurPtr++;
}
+
+ // If we are returning comments as tokens, return this comment as a token.
+ if (KeepCommentMode) {
+ Result.SetKind(tok::comment);
+ FormTokenWithChars(Result, CurPtr);
+ return false;
+ }
// It is common for the tokens immediately after a /**/ comment to be
// whitespace. Instead of going through the big switch, handle it
// efficiently now.
if (isHorizontalWhitespace(*CurPtr)) {
Result.SetFlag(LexerToken::LeadingSpace);
- return SkipWhitespace(Result, CurPtr+1);
+ SkipWhitespace(Result, CurPtr+1);
+ return true;
}
// Otherwise, just return so that the next character will be lexed as a token.
BufferPtr = CurPtr;
Result.SetFlag(LexerToken::LeadingSpace);
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -920,6 +956,9 @@
Result.SetKind(tok::eom);
// Update the location of token as well as BufferPtr.
FormTokenWithChars(Result, CurPtr);
+
+ // Restore comment saving mode, in case it was disabled for directive.
+ KeepCommentMode = Features.KeepComments;
return true; // Have a token.
}
@@ -1035,6 +1074,9 @@
// Done parsing the "line".
ParsingPreprocessorDirective = false;
+ // Restore comment saving mode, in case it was disabled for directive.
+ KeepCommentMode = Features.KeepComments;
+
// Since we consumed a newline, we are back at the start of a line.
IsAtStartOfLine = true;
@@ -1211,13 +1253,13 @@
// 6.4.9: Comments
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '/') { // BCPL comment.
- Result.SetFlag(LexerToken::LeadingSpace);
- SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
- goto LexNextToken; // GCC isn't tail call eliminating.
+ if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ return; // KeepCommentMode
} else if (Char == '*') { // /**/ comment.
- Result.SetFlag(LexerToken::LeadingSpace);
- SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result));
- goto LexNextToken; // GCC isn't tail call eliminating.
+ if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
+ goto LexNextToken; // GCC isn't tail call eliminating.
+ return; // KeepCommentMode
} else if (Char == '=') {
Result.SetKind(tok::slashequal);
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Modified: cfe/cfe/trunk/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Preprocessor.cpp?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/Lex/Preprocessor.cpp (original)
+++ cfe/cfe/trunk/Lex/Preprocessor.cpp Wed Jul 11 11:24:34 2007
@@ -755,6 +755,10 @@
return 0;
}
// Otherwise, continue to add the tokens to this variable argument.
+ } else if (Tok.getKind() == tok::comment && !Features.KeepMacroComments) {
+ // If this is a comment token in the argument list and we're just in
+ // -C mode (not -CC mode), discard the comment.
+ continue;
}
ArgTokens.push_back(Tok);
@@ -1221,6 +1225,8 @@
// directive mode. Tell the lexer this so any newlines we see will be
// converted into an EOM token (this terminates the macro).
CurLexer->ParsingPreprocessorDirective = true;
+ CurLexer->KeepCommentMode = false;
+
// Read the next token, the directive flavor.
LexUnexpandedToken(Tok);
@@ -1229,6 +1235,8 @@
// something bogus), skip it.
if (Tok.getKind() != tok::identifier) {
CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = Features.KeepComments;
continue;
}
@@ -1242,6 +1250,8 @@
if (FirstChar >= 'a' && FirstChar <= 'z' &&
FirstChar != 'i' && FirstChar != 'e') {
CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = Features.KeepComments;
continue;
}
@@ -1261,6 +1271,8 @@
IdLen = DirectiveStr.size();
if (IdLen >= 20) {
CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = Features.KeepComments;
continue;
}
memcpy(Directive, &DirectiveStr[0], IdLen);
@@ -1339,6 +1351,8 @@
}
CurLexer->ParsingPreprocessorDirective = false;
+ // Restore comment saving mode.
+ CurLexer->KeepCommentMode = Features.KeepComments;
}
// Finally, if we are out of the conditional (saw an #endif or ran off the end
@@ -1698,6 +1712,10 @@
if (MacroNameTok.getKind() == tok::eom)
return;
+ // If we are supposed to keep comments in #defines, reenable comment saving
+ // mode.
+ CurLexer->KeepCommentMode = Features.KeepMacroComments;
+
MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation());
LexerToken Tok;
Modified: cfe/cfe/trunk/README.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/README.txt?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/README.txt (original)
+++ cfe/cfe/trunk/README.txt Wed Jul 11 11:24:34 2007
@@ -67,7 +67,6 @@
Preprocessor:
* #assert/#unassert
* #line / #file directives
- * -C output mode in -E mode.
* MSExtension: "L#param" stringizes to a wide string literal.
Traditional Preprocessor:
Modified: cfe/cfe/trunk/include/clang/Basic/TokenKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/TokenKinds.def?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/TokenKinds.def (original)
+++ cfe/cfe/trunk/include/clang/Basic/TokenKinds.def Wed Jul 11 11:24:34 2007
@@ -28,6 +28,9 @@
TOK(eof) // End of file.
TOK(eom) // End of macro (end of line inside a macro).
+// C99 6.4.9: Comments.
+TOK(comment) // Comment (only in -E -C[C] mode)
+
// C99 6.4.2: Identifiers.
TOK(identifier) // abcde123
Modified: cfe/cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/Lexer.h?rev=38783&r1=38782&r2=38783&view=diff
==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/cfe/trunk/include/clang/Lex/Lexer.h Wed Jul 11 11:24:34 2007
@@ -26,23 +26,27 @@
class SourceBuffer;
struct LangOptions {
- unsigned Trigraphs : 1; // Trigraphs in source files.
- unsigned BCPLComment : 1; // BCPL-style // comments.
- unsigned DollarIdents : 1; // '$' allowed in identifiers.
- unsigned Digraphs : 1; // When added to C? C99?
- unsigned HexFloats : 1; // C99 Hexadecimal float constants.
- unsigned C99 : 1; // C99 Support
- unsigned Microsoft : 1; // Microsoft extensions.
- unsigned CPlusPlus : 1; // C++ Support
- unsigned CPPMinMax : 1; // C++ <?=, >?= tokens.
- unsigned NoExtensions : 1; // All extensions are disabled, strict mode.
+ unsigned Trigraphs : 1; // Trigraphs in source files.
+ unsigned BCPLComment : 1; // BCPL-style // comments.
+ unsigned DollarIdents : 1; // '$' allowed in identifiers.
+ unsigned Digraphs : 1; // When added to C? C99?
+ unsigned HexFloats : 1; // C99 Hexadecimal float constants.
+ unsigned C99 : 1; // C99 Support
+ unsigned Microsoft : 1; // Microsoft extensions.
+ unsigned CPlusPlus : 1; // C++ Support
+ unsigned CPPMinMax : 1; // C++ <?=, >?= tokens.
+ unsigned NoExtensions : 1; // All extensions are disabled, strict mode.
- unsigned ObjC1 : 1; // Objective C 1 support enabled.
- unsigned ObjC2 : 1; // Objective C 2 support enabled (implies ObjC1).
+ unsigned ObjC1 : 1; // Objective C 1 support enabled.
+ unsigned ObjC2 : 1; // Objective C 2 support enabled.
+
+ unsigned KeepComments : 1; // Keep comments ("-C") mode.
+ unsigned KeepMacroComments : 1; // Keep macro-exp comments ("-CC") mode.
LangOptions() {
Trigraphs = BCPLComment = DollarIdents = Digraphs = ObjC1 = ObjC2 = 0;
C99 = Microsoft = CPlusPlus = CPPMinMax = NoExtensions = 0;
+ KeepComments = KeepMacroComments = 0;
}
};
@@ -87,6 +91,10 @@
/// on an unterminated '/*' comment.
bool LexingRawMode;
+ /// KeepCommentMode - The lexer can optionally keep C & BCPL-style comments,
+ /// and return them as tokens. This is used for -C and -CC modes.
+ bool KeepCommentMode;
+
//===--------------------------------------------------------------------===//
// Context that changes as the file is lexed.
// NOTE: any state that mutates when in raw mode must have save/restore code
@@ -353,9 +361,9 @@
bool LexEndOfFile (LexerToken &Result, const char *CurPtr);
void SkipWhitespace (LexerToken &Result, const char *CurPtr);
- void SkipBCPLComment (LexerToken &Result, const char *CurPtr);
- void SkipBlockComment (LexerToken &Result, const char *CurPtr);
-
+ bool SkipBCPLComment (LexerToken &Result, const char *CurPtr);
+ bool SkipBlockComment (LexerToken &Result, const char *CurPtr);
+ bool SaveBCPLComment (LexerToken &Result, const char *CurPtr);
/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and
/// (potentially) macro expand the filename. If the sequence parsed is not
More information about the cfe-commits
mailing list