r175778 - Preprocessor: preserve whitespace in -traditional-cpp mode.
Jordan Rose
jordan_rose at apple.com
Thu Feb 21 10:53:19 PST 2013
Author: jrose
Date: Thu Feb 21 12:53:19 2013
New Revision: 175778
URL: http://llvm.org/viewvc/llvm-project?rev=175778&view=rev
Log:
Preprocessor: preserve whitespace in -traditional-cpp mode.
Note that unlike GNU cpp we currently do not preserve whitespace in macros
(even in -traditional-cpp mode).
<rdar://problem/12897179>
Modified:
cfe/trunk/include/clang/Lex/Lexer.h
cfe/trunk/lib/Frontend/PrintPreprocessedOutput.cpp
cfe/trunk/lib/Lex/Lexer.cpp
cfe/trunk/lib/Lex/PPDirectives.cpp
cfe/trunk/test/Preprocessor/traditional-cpp.c
Modified: cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Lexer.h?rev=175778&r1=175777&r2=175778&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/trunk/include/clang/Lex/Lexer.h Thu Feb 21 12:53:19 2013
@@ -174,8 +174,8 @@ public:
/// SetKeepWhitespaceMode - This method lets clients enable or disable
/// whitespace retention mode.
void SetKeepWhitespaceMode(bool Val) {
- assert((!Val || LexingRawMode) &&
- "Can only enable whitespace retention in raw mode");
+ assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
+ "Can only retain whitespace in raw mode or -traditional-cpp");
ExtendedTokenMode = Val ? 2 : 0;
}
@@ -194,6 +194,14 @@ public:
ExtendedTokenMode = Mode ? 1 : 0;
}
+ /// Sets the extended token mode back to its initial value, according to the
+ /// language options and preprocessor. This controls whether the lexer
+ /// produces comment and whitespace tokens.
+ ///
+ /// This requires the lexer to have an associated preprocessor. A standalone
+ /// lexer has nothing to reset to.
+ void resetExtendedTokenMode();
+
const char *getBufferStart() const { return BufferStart; }
/// ReadToEndOfLine - Read the rest of the current preprocessor line as an
Modified: cfe/trunk/lib/Frontend/PrintPreprocessedOutput.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/PrintPreprocessedOutput.cpp?rev=175778&r1=175777&r2=175778&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/PrintPreprocessedOutput.cpp (original)
+++ cfe/trunk/lib/Frontend/PrintPreprocessedOutput.cpp Thu Feb 21 12:53:19 2013
@@ -548,7 +548,7 @@ static void PrintPreprocessedTokens(Prep
// Tokens that can contain embedded newlines need to adjust our current
// line number.
- if (Tok.getKind() == tok::comment)
+ if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
Callbacks->HandleNewlinesInToken(TokPtr, Len);
} else {
std::string S = PP.getSpelling(Tok);
@@ -556,7 +556,7 @@ static void PrintPreprocessedTokens(Prep
// Tokens that can contain embedded newlines need to adjust our current
// line number.
- if (Tok.getKind() == tok::comment)
+ if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
Callbacks->HandleNewlinesInToken(&S[0], S.size());
}
Callbacks->setEmittedTokensOnThisLine();
Modified: cfe/trunk/lib/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=175778&r1=175777&r2=175778&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Lexer.cpp (original)
+++ cfe/trunk/lib/Lex/Lexer.cpp Thu Feb 21 12:53:19 2013
@@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::Mem
InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
InputFile->getBufferEnd());
- // Default to keeping comments if the preprocessor wants them.
- SetCommentRetentionState(PP.getCommentRetentionState());
+ resetExtendedTokenMode();
+}
+
+void Lexer::resetExtendedTokenMode() {
+ assert(PP && "Cannot reset token mode without a preprocessor");
+ if (LangOpts.TraditionalCPP)
+ SetKeepWhitespaceMode(true);
+ else
+ SetCommentRetentionState(PP->getCommentRetentionState());
}
/// Lexer constructor - Create a new raw lexer object. This object is only
@@ -1844,6 +1851,8 @@ void Lexer::LexCharConstant(Token &Resul
///
bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
// Whitespace - Skip it, then return the token after the whitespace.
+ bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
+
unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently.
while (1) {
// Skip horizontal whitespace very aggressively.
@@ -1851,7 +1860,7 @@ bool Lexer::SkipWhitespace(Token &Result
Char = *++CurPtr;
// Otherwise if we have something other than whitespace, we're done.
- if (Char != '\n' && Char != '\r')
+ if (!isVerticalWhitespace(Char))
break;
if (ParsingPreprocessorDirective) {
@@ -1861,24 +1870,27 @@ bool Lexer::SkipWhitespace(Token &Result
}
// ok, but handle newline.
- // The returned token is at the start of the line.
- Result.setFlag(Token::StartOfLine);
- // No leading whitespace seen so far.
- Result.clearFlag(Token::LeadingSpace);
+ SawNewline = true;
Char = *++CurPtr;
}
- // If this isn't immediately after a newline, there is leading space.
- char PrevChar = CurPtr[-1];
- if (PrevChar != '\n' && PrevChar != '\r')
- Result.setFlag(Token::LeadingSpace);
-
// If the client wants us to return whitespace, return it now.
if (isKeepWhitespaceMode()) {
FormTokenWithChars(Result, CurPtr, tok::unknown);
+ if (SawNewline)
+ IsAtStartOfLine = true;
+ // FIXME: The next token will not have LeadingSpace set.
return true;
}
+ // If this isn't immediately after a newline, there is leading space.
+ char PrevChar = CurPtr[-1];
+ bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
+
+ Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+ if (SawNewline)
+ Result.setFlag(Token::StartOfLine);
+
BufferPtr = CurPtr;
return false;
}
@@ -2269,7 +2281,6 @@ bool Lexer::SkipBlockComment(Token &Resu
// efficiently now. This is safe even in KeepWhitespaceMode because we would
// have already returned above with the comment as a token.
if (isHorizontalWhitespace(*CurPtr)) {
- Result.setFlag(Token::LeadingSpace);
SkipWhitespace(Result, CurPtr+1);
return false;
}
@@ -2351,7 +2362,7 @@ bool Lexer::LexEndOfFile(Token &Result,
FormTokenWithChars(Result, CurPtr, tok::eod);
// Restore comment saving mode, in case it was disabled for directive.
- SetCommentRetentionState(PP->getCommentRetentionState());
+ resetExtendedTokenMode();
return true; // Have a token.
}
@@ -2718,6 +2729,7 @@ LexNextToken:
// whitespace.
if (isKeepWhitespaceMode()) {
FormTokenWithChars(Result, CurPtr, tok::unknown);
+ // FIXME: The next token will not have LeadingSpace set.
return;
}
@@ -2785,7 +2797,7 @@ LexNextToken:
// Restore comment saving mode, in case it was disabled for directive.
if (PP)
- SetCommentRetentionState(PP->getCommentRetentionState());
+ resetExtendedTokenMode();
// Since we consumed a newline, we are back at the start of a line.
IsAtStartOfLine = true;
@@ -2793,8 +2805,7 @@ LexNextToken:
Kind = tok::eod;
break;
}
- // The returned token is at the start of the line.
- Result.setFlag(Token::StartOfLine);
+
// No leading whitespace seen so far.
Result.clearFlag(Token::LeadingSpace);
Modified: cfe/trunk/lib/Lex/PPDirectives.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPDirectives.cpp?rev=175778&r1=175777&r2=175778&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/PPDirectives.cpp (original)
+++ cfe/trunk/lib/Lex/PPDirectives.cpp Thu Feb 21 12:53:19 2013
@@ -269,7 +269,7 @@ void Preprocessor::SkipExcludedCondition
if (Tok.isNot(tok::raw_identifier)) {
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
continue;
}
@@ -285,7 +285,7 @@ void Preprocessor::SkipExcludedCondition
FirstChar != 'i' && FirstChar != 'e') {
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
continue;
}
@@ -302,7 +302,7 @@ void Preprocessor::SkipExcludedCondition
if (IdLen >= 20) {
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
continue;
}
memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
@@ -408,7 +408,7 @@ void Preprocessor::SkipExcludedCondition
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
- if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+ if (CurLexer) CurLexer->resetExtendedTokenMode();
}
// Finally, if we are out of the conditional (saw an #endif or ran off the end
@@ -594,6 +594,7 @@ void Preprocessor::HandleDirective(Token
// mode. Tell the lexer this so any newlines we see will be converted into an
// EOD token (which terminates the directive).
CurPPLexer->ParsingPreprocessorDirective = true;
+ if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
++NumDirectives;
@@ -638,14 +639,9 @@ void Preprocessor::HandleDirective(Token
// and reset to previous state when returning from this function.
ResetMacroExpansionHelper helper(this);
-TryAgain:
switch (Result.getKind()) {
case tok::eod:
return; // null directive.
- case tok::comment:
- // Handle stuff like "# /*foo*/ define X" in -E -C mode.
- LexUnexpandedToken(Result);
- goto TryAgain;
case tok::code_completion:
if (CodeComplete)
CodeComplete->CodeCompleteDirective(
Modified: cfe/trunk/test/Preprocessor/traditional-cpp.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Preprocessor/traditional-cpp.c?rev=175778&r1=175777&r2=175778&view=diff
==============================================================================
--- cfe/trunk/test/Preprocessor/traditional-cpp.c (original)
+++ cfe/trunk/test/Preprocessor/traditional-cpp.c Thu Feb 21 12:53:19 2013
@@ -4,9 +4,61 @@
/*
RUN: %clang_cc1 -traditional-cpp %s -E -o %t
- RUN: FileCheck < %t %s
+ RUN: FileCheck -strict-whitespace < %t %s
*/
-/* CHECK: foo // bar
+/* CHECK: {{^}}foo // bar{{$}}
*/
foo // bar
+
+
+/* The lines in this file contain hard tab characters and trailing whitespace;
+ * do not change them! */
+
+/* CHECK: {{^}} indented!{{$}}
+ * CHECK: {{^}}tab separated values{{$}}
+ */
+ indented!
+tab separated values
+
+#define bracket(x) >>>x<<<
+bracket(| spaces |)
+/* CHECK: {{^}}>>>| spaces |<<<{{$}}
+ */
+
+/* This is still a preprocessing directive. */
+# define foo bar
+foo!
+-
+ foo! foo!
+/* CHECK: {{^}}bar!{{$}}
+ * CHECK: {{^}} bar! bar! {{$}}
+ */
+
+/* Deliberately check a leading newline with spaces on that line. */
+
+# define foo bar
+foo!
+-
+ foo! foo!
+/* CHECK: {{^}}bar!{{$}}
+ * CHECK: {{^}} bar! bar! {{$}}
+ */
+
+/* FIXME: -traditional-cpp should not consider this a preprocessing directive
+ * because the # isn't in the first column.
+ */
+ #define foo2 bar
+foo2!
+/* If this were working, both of these checks would be on.
+ * CHECK-NOT: {{^}} #define foo2 bar{{$}}
+ * CHECK-NOT: {{^}}foo2!{{$}}
+ */
+
+/* FIXME: -traditional-cpp should not homogenize whitespace in macros.
+ */
+#define bracket2(x) >>> x <<<
+bracket2(spaces)
+/* If this were working, this check would be on.
+ * CHECK-NOT: {{^}}>>> spaces <<<{{$}}
+ */
More information about the cfe-commits
mailing list