[PATCH] Make Preprocessor::Lex non-recursive

Eli Friedman eli.friedman at gmail.com
Mon Sep 2 17:26:33 PDT 2013


Per subject, patch attached which makes Preprocessor::Lex non-recursive.

Before this patch, Lex() would recurse whenever the current lexer changed
(e.g. upon entry into a macro).  This patch turns the recursion into a
loop: the various lex routines now don't return a token when the current
lexer changes, and at the top level Preprocessor::Lex() now loops until it
finds a token.  Normally, the recursion doesn't end up being very deep, but
the recursion depth can explode in edge cases like a bunch of consecutive
macros which expand to nothing (like in the testcase
test/Preprocessor/macro_expand_empty.c in this patch).

To make this work, I made some substantial changes to the way the
whitespace and empty macro flags are propagated.  It had to be rewritten
alongside this patch because the previous code to handle this wasn't
tail-recursive.  The code for this is pretty straightforward: it just adds
a couple flags to the Lexer class, then updates them whenever we leave a
macro expansion.  I also fixed a minor bug while I was in the area: the
previous code didn't work correctly for macros which had tokens in the
definitions which expanded to nothing.

I still need to do performance measurements, but I'm not anticipating any
measurable changes.

-Eli
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130902/a3828017/attachment.html>
-------------- next part --------------
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 9b386e1..f456fa9 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -80,6 +80,12 @@ class Lexer : public PreprocessorLexer {
   // line" flag set on it.
   bool IsAtStartOfLine;
 
+  bool IsAtPhysicalStartOfLine;
+
+  bool HasLeadingSpace;
+
+  bool HasLeadingEmptyMacro;
+
   // CurrentConflictMarkerState - The kind of conflict marker we are handling.
   ConflictMarkerKind CurrentConflictMarkerState;
 
@@ -127,31 +133,21 @@ public:
   /// from.  Currently this is only used by _Pragma handling.
   SourceLocation getFileLoc() const { return FileLoc; }
 
+private:
   /// Lex - Return the next token in the file.  If this is the end of file, it
   /// return the tok::eof token.  This implicitly involves the preprocessor.
-  void Lex(Token &Result) {
-    // Start a new token.
-    Result.startToken();
-
-    // NOTE, any changes here should also change code after calls to
-    // Preprocessor::HandleDirective
-    if (IsAtStartOfLine) {
-      Result.setFlag(Token::StartOfLine);
-      IsAtStartOfLine = false;
-    }
-
-    // Get a token.  Note that this may delete the current lexer if the end of
-    // file is reached.
-    LexTokenInternal(Result);
-  }
+  bool Lex(Token &Result);
 
+public:
   /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
   bool isPragmaLexer() const { return Is_PragmaLexer; }
 
+private:
   /// IndirectLex - An indirect call to 'Lex' that can be invoked via
   ///  the PreprocessorLexer interface.
   void IndirectLex(Token &Result) { Lex(Result); }
 
+public:
   /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no
   /// associated preprocessor object.  Return true if the 'next character to
   /// read' pointer points at the end of the lexer buffer, false otherwise.
@@ -447,12 +443,14 @@ private:
   /// LexTokenInternal - Internal interface to lex a preprocessing token. Called
   /// by Lex.
   ///
-  void LexTokenInternal(Token &Result);
+  bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
+
+  bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
 
   /// Given that a token begins with the Unicode character \p C, figure out
   /// what kind of token it is and dispatch to the appropriate lexing helper
   /// function.
-  void LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
+  bool LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
 
   /// FormTokenWithChars - When we lex a token, we have identified a span
   /// starting at BufferPtr, going to TokEnd that forms the token.  This method
@@ -570,24 +568,28 @@ private:
 
   void SkipBytes(unsigned Bytes, bool StartOfLine);
 
+  void PropagateLineStartLeadingSpaceInfo(Token &Result);
+
   const char *LexUDSuffix(Token &Result, const char *CurPtr,
                           bool IsStringLiteral);
 
   // Helper functions to lex the remainder of a token of the specific type.
-  void LexIdentifier         (Token &Result, const char *CurPtr);
-  void LexNumericConstant    (Token &Result, const char *CurPtr);
-  void LexStringLiteral      (Token &Result, const char *CurPtr,
+  bool LexIdentifier         (Token &Result, const char *CurPtr);
+  bool LexNumericConstant    (Token &Result, const char *CurPtr);
+  bool LexStringLiteral      (Token &Result, const char *CurPtr,
                               tok::TokenKind Kind);
-  void LexRawStringLiteral   (Token &Result, const char *CurPtr,
+  bool LexRawStringLiteral   (Token &Result, const char *CurPtr,
                               tok::TokenKind Kind);
-  void LexAngledStringLiteral(Token &Result, const char *CurPtr);
-  void LexCharConstant       (Token &Result, const char *CurPtr,
+  bool LexAngledStringLiteral(Token &Result, const char *CurPtr);
+  bool LexCharConstant       (Token &Result, const char *CurPtr,
                               tok::TokenKind Kind);
   bool LexEndOfFile          (Token &Result, const char *CurPtr);
-
-  bool SkipWhitespace        (Token &Result, const char *CurPtr);
-  bool SkipLineComment       (Token &Result, const char *CurPtr);
-  bool SkipBlockComment      (Token &Result, const char *CurPtr);
+  bool SkipWhitespace        (Token &Result, const char *CurPtr,
+                              bool &TokAtPhysicalStartOfLine);
+  bool SkipLineComment       (Token &Result, const char *CurPtr,
+                              bool &TokAtPhysicalStartOfLine);
+  bool SkipBlockComment      (Token &Result, const char *CurPtr,
+                              bool &TokAtPhysicalStartOfLine);
   bool SaveLineComment       (Token &Result, const char *CurPtr);
   
   bool IsStartOfConflictMarker(const char *CurPtr);
diff --git a/include/clang/Lex/PTHLexer.h b/include/clang/Lex/PTHLexer.h
index a9276e8..d748bc1 100644
--- a/include/clang/Lex/PTHLexer.h
+++ b/include/clang/Lex/PTHLexer.h
@@ -68,7 +68,7 @@ public:
   ~PTHLexer() {}
 
   /// Lex - Return the next token.
-  void Lex(Token &Tok);
+  bool Lex(Token &Tok);
 
   void getEOF(Token &Tok);
 
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index f197633..52a0d53 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -719,13 +719,28 @@ public:
   /// Lex - To lex a token from the preprocessor, just pull a token from the
   /// current lexer or macro object.
   void Lex(Token &Result) {
-    switch (CurLexerKind) {
-    case CLK_Lexer: CurLexer->Lex(Result); break;
-    case CLK_PTHLexer: CurPTHLexer->Lex(Result); break;
-    case CLK_TokenLexer: CurTokenLexer->Lex(Result); break;
-    case CLK_CachingLexer: CachingLex(Result); break;
-    case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); break;
-    }
+    bool ReturnedToken;
+    do {
+      switch (CurLexerKind) {
+      case CLK_Lexer:
+        ReturnedToken = CurLexer->Lex(Result);
+        break;
+      case CLK_PTHLexer:
+        ReturnedToken = CurPTHLexer->Lex(Result);
+        break;
+      case CLK_TokenLexer:
+        ReturnedToken = CurTokenLexer->Lex(Result);
+        break;
+      case CLK_CachingLexer:
+        CachingLex(Result);
+        ReturnedToken = true;
+        break;
+      case CLK_LexAfterModuleImport:
+        LexAfterModuleImport(Result);
+        ReturnedToken = true;
+        break;
+      }
+    } while (!ReturnedToken);
   }
 
   void LexAfterModuleImport(Token &Result);
@@ -1168,7 +1183,10 @@ public:
   /// identifier and has filled in the tokens IdentifierInfo member.  This
   /// callback potentially macro expands it or turns it into a named token (like
   /// 'for').
-  void HandleIdentifier(Token &Identifier);
+  ///
+  /// \returns true if we actually computed a token, false if we need to
+  /// lex again.
+  bool HandleIdentifier(Token &Identifier);
 
 
   /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
@@ -1286,6 +1304,8 @@ private:
     IncludeMacroStack.pop_back();
   }
 
+  void PropagateLineStartLeadingSpaceInfo(Token &Result);
+
   /// \brief Allocate a new MacroInfo object.
   MacroInfo *AllocateMacroInfo();
 
@@ -1342,7 +1362,7 @@ private:
 
   /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
   /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
-  /// the macro should not be expanded return true, otherwise return false.
+  /// we lexed a token, return true; otherwise the caller should lex again.
   bool HandleMacroExpandedIdentifier(Token &Tok, MacroDirective *MD);
 
   /// \brief Cache macro expanded tokens for TokenLexers.
diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h
index 090402a..7c8cfd0 100644
--- a/include/clang/Lex/TokenLexer.h
+++ b/include/clang/Lex/TokenLexer.h
@@ -136,7 +136,7 @@ public:
   unsigned isNextTokenLParen() const;
 
   /// Lex - Lex and return a token from this macro stream.
-  void Lex(Token &Tok);
+  bool Lex(Token &Tok);
 
   /// isParsingPreprocessorDirective - Return true if we are in the middle of a
   /// preprocessor directive.
@@ -181,6 +181,8 @@ private:
   /// macro definition.
   void updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
                                   Token *begin_tokens, Token *end_tokens);
+
+  void PropagateLineStartLeadingSpaceInfo(Token &Result);
 };
 
 }  // end namespace clang
diff --git a/lib/Frontend/VerifyDiagnosticConsumer.cpp b/lib/Frontend/VerifyDiagnosticConsumer.cpp
index f555529..045e60a 100644
--- a/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -555,7 +555,7 @@ static bool findDirectives(SourceManager &SM, FileID FID,
   VerifyDiagnosticConsumer::DirectiveStatus Status =
     VerifyDiagnosticConsumer::HasNoDirectives;
   while (Tok.isNot(tok::eof)) {
-    RawLex.Lex(Tok);
+    RawLex.LexFromRawLexer(Tok);
     if (!Tok.is(tok::comment)) continue;
 
     std::string Comment = RawLex.getSpelling(Tok, SM, LangOpts);
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 7d2869b..6d0153e 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -93,6 +93,10 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
 
   // Start of the file is a start of line.
   IsAtStartOfLine = true;
+  IsAtPhysicalStartOfLine = true;
+
+  HasLeadingSpace = false;
+  HasLeadingEmptyMacro = false;
 
   // We are not after parsing a #.
   ParsingPreprocessorDirective = false;
@@ -1361,7 +1365,11 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
   BufferPtr += Bytes;
   if (BufferPtr > BufferEnd)
     BufferPtr = BufferEnd;
+  // FIXME: What exactly does the StartOfLine bit mean?  There are two
+  // possible meanings for the "start" of the line: the first token on the
+  // unexpanded line, or the first token on the expanded line.
   IsAtStartOfLine = StartOfLine;
+  IsAtPhysicalStartOfLine = StartOfLine;
 }
 
 static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
@@ -1438,7 +1446,7 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
   }
  }
 
-void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
+bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
   // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
   unsigned Size;
   unsigned char C = *CurPtr++;
@@ -1462,7 +1470,7 @@ FinishIdentifier:
     // If we are in raw mode, return this identifier raw.  There is no need to
     // look up identifier information or attempt to macro expand it.
     if (LexingRawMode)
-      return;
+      return true;
 
     // Fill in Result.IdentifierInfo and update the token kind,
     // looking up the identifier in the identifier table.
@@ -1471,9 +1479,9 @@ FinishIdentifier:
     // Finally, now that we know we have an identifier, pass this off to the
     // preprocessor, which may macro expand it or something.
     if (II->isHandleIdentifierCase())
-      PP->HandleIdentifier(Result);
+      return PP->HandleIdentifier(Result);
     
-    return;
+    return true;
   }
 
   // Otherwise, $,\,? in identifier found.  Enter slower path.
@@ -1563,7 +1571,7 @@ bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
 /// LexNumericConstant - Lex the remainder of a integer or floating point
 /// constant. From[-1] is the first character lexed.  Return the end of the
 /// constant.
-void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
+bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   unsigned Size;
   char C = getCharAndSize(CurPtr, Size);
   char PrevCh = 0;
@@ -1601,6 +1609,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
   Result.setLiteralData(TokStart);
+  return true;
 }
 
 /// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
@@ -1657,7 +1666,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
 
 /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
 /// either " or L" or u8" or u" or U".
-void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
+bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
                              tok::TokenKind Kind) {
   const char *NulCharacter = 0; // Does this string contain the \0 character?
 
@@ -1681,14 +1690,15 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
         Diag(BufferPtr, diag::ext_unterminated_string);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-      return;
+      return true;
     }
     
     if (C == 0) {
       if (isCodeCompletionPoint(CurPtr-1)) {
         PP->CodeCompleteNaturalLanguage();
         FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-        return cutOffLexing();
+        cutOffLexing();
+        return true;
       }
 
       NulCharacter = CurPtr-1;
@@ -1708,11 +1718,12 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
   const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, Kind);
   Result.setLiteralData(TokStart);
+  return true;
 }
 
 /// LexRawStringLiteral - Lex the remainder of a raw string literal, after
 /// having lexed R", LR", u8R", uR", or UR".
-void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
+bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
                                 tok::TokenKind Kind) {
   // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
   //  Between the initial and final double quote characters of the raw string,
@@ -1754,7 +1765,7 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
     }
 
     FormTokenWithChars(Result, CurPtr, tok::unknown);
-    return;
+    return true;
   }
 
   // Save prefix and move CurPtr past it
@@ -1775,7 +1786,7 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
         Diag(BufferPtr, diag::err_unterminated_raw_string)
           << StringRef(Prefix, PrefixLen);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-      return;
+      return true;
     }
   }
 
@@ -1787,11 +1798,12 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
   const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, Kind);
   Result.setLiteralData(TokStart);
+  return true;
 }
 
 /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
 /// after having lexed the '<' character.  This is used for #include filenames.
-void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
+bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
   const char *NulCharacter = 0; // Does this string contain the \0 character?
   const char *AfterLessPos = CurPtr;
   char C = getAndAdvanceChar(CurPtr, Result);
@@ -1806,7 +1818,7 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
       // If the filename is unterminated, then it must just be a lone <
       // character.  Return this as such.
       FormTokenWithChars(Result, AfterLessPos, tok::less);
-      return;
+      return true;
     } else if (C == 0) {
       NulCharacter = CurPtr-1;
     }
@@ -1821,12 +1833,13 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
   const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
   Result.setLiteralData(TokStart);
+  return true;
 }
 
 
 /// LexCharConstant - Lex the remainder of a character constant, after having
 /// lexed either ' or L' or u' or U'.
-void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
+bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
                             tok::TokenKind Kind) {
   const char *NulCharacter = 0; // Does this character contain the \0 character?
 
@@ -1841,7 +1854,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
     if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
       Diag(BufferPtr, diag::ext_empty_character);
     FormTokenWithChars(Result, CurPtr, tok::unknown);
-    return;
+    return true;
   }
 
   while (C != '\'') {
@@ -1854,14 +1867,15 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
         Diag(BufferPtr, diag::ext_unterminated_char);
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-      return;
+      return true;
     }
 
     if (C == 0) {
       if (isCodeCompletionPoint(CurPtr-1)) {
         PP->CodeCompleteNaturalLanguage();
         FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-        return cutOffLexing();
+        cutOffLexing();
+        return true;
       }
 
       NulCharacter = CurPtr-1;
@@ -1881,6 +1895,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
   const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, Kind);
   Result.setLiteralData(TokStart);
+  return true;
 }
 
 /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
@@ -1888,7 +1903,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 ///
 /// This method forms a token and returns true if KeepWhitespaceMode is enabled.
 ///
-bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
+bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
+                           bool &TokAtPhysicalStartOfLine) {
   // Whitespace - Skip it, then return the token after the whitespace.
   bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
 
@@ -1918,8 +1934,10 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
   // If the client wants us to return whitespace, return it now.
   if (isKeepWhitespaceMode()) {
     FormTokenWithChars(Result, CurPtr, tok::unknown);
-    if (SawNewline)
+    if (SawNewline) {
       IsAtStartOfLine = true;
+      IsAtPhysicalStartOfLine = true;
+    }
     // FIXME: The next token will not have LeadingSpace set.
     return true;
   }
@@ -1929,8 +1947,10 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
   bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
 
   Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
-  if (SawNewline)
+  if (SawNewline) {
     Result.setFlag(Token::StartOfLine);
+    TokAtPhysicalStartOfLine = true;
+  }
 
   BufferPtr = CurPtr;
   return false;
@@ -1942,7 +1962,8 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
 ///
 /// If we're in KeepCommentMode or any CommentHandler has inserted
 /// some tokens, this will store the first token and return true.
-bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
+bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
+                            bool &TokAtPhysicalStartOfLine) {
   // If Line comments aren't explicitly enabled for this language, emit an
   // extension warning.
   if (!LangOpts.LineComment && !isLexingRawMode()) {
@@ -2061,6 +2082,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
 
   // The next returned token is at the start of the line.
   Result.setFlag(Token::StartOfLine);
+  TokAtPhysicalStartOfLine = true;
   // No leading whitespace seen so far.
   Result.clearFlag(Token::LeadingSpace);
   BufferPtr = CurPtr;
@@ -2171,7 +2193,8 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
 ///
 /// If we're in KeepCommentMode or any CommentHandler has inserted
 /// some tokens, this will store the first token and return true.
-bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
+                             bool &TokAtPhysicalStartOfLine) {
   // Scan one character past where we should, looking for a '/' character.  Once
   // we find it, check to see if it was preceded by a *.  This common
   // optimization helps people who like to put a lot of * characters in their
@@ -2322,7 +2345,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
   // efficiently now.  This is safe even in KeepWhitespaceMode because we would
   // have already returned above with the comment as a token.
   if (isHorizontalWhitespace(*CurPtr)) {
-    SkipWhitespace(Result, CurPtr+1);
+    SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
     return false;
   }
 
@@ -2472,14 +2495,19 @@ unsigned Lexer::isNextPPTokenLParen() {
   // Save state that can be changed while lexing so that we can restore it.
   const char *TmpBufferPtr = BufferPtr;
   bool inPPDirectiveMode = ParsingPreprocessorDirective;
+  bool atStartOfLine = IsAtStartOfLine;
+  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
+  bool leadingSpace = HasLeadingSpace;
 
   Token Tok;
-  Tok.startToken();
-  LexTokenInternal(Tok);
+  Lex(Tok);
 
   // Restore state that may have changed.
   BufferPtr = TmpBufferPtr;
   ParsingPreprocessorDirective = inPPDirectiveMode;
+  HasLeadingSpace = leadingSpace;
+  IsAtStartOfLine = atStartOfLine;
+  IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
 
   // Restore the lexer back to non-skipping mode.
   LexingRawMode = false;
@@ -2712,7 +2740,8 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
   return CodePoint;
 }
 
-void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C,
+                                   const char *CurPtr) {
   static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
       UnicodeWhitespaceCharRanges);
   if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
@@ -2721,12 +2750,12 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
       << makeCharRange(*this, BufferPtr, CurPtr);
 
     Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
-
-    return LexTokenInternal(Result);
+    return true;
   }
+  return false;
+}
 
+bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
   if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) {
     if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
         !PP->isPreprocessedOutput()) {
@@ -2755,22 +2784,54 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
       << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr));
 
     BufferPtr = CurPtr;
-    return LexTokenInternal(Result);
+    return false;
   }
 
   // Otherwise, we have an explicit UCN or a character that's unlikely to show
   // up by accident.
   MIOpt.ReadToken();
   FormTokenWithChars(Result, CurPtr, tok::unknown);
+  return true;
+}
+
+void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+  IsAtStartOfLine = Result.isAtStartOfLine();
+  HasLeadingSpace = Result.hasLeadingSpace();
+  HasLeadingEmptyMacro = Result.hasLeadingEmptyMacro();
+  // Note that this doesn't affect IsAtPhysicalStartOfLine.
 }
 
+bool Lexer::Lex(Token &Result) {
+  // Start a new token.
+  Result.startToken();
+
+  // Set up misc whitespace flags for LexTokenInternal.
+  if (IsAtStartOfLine) {
+    Result.setFlag(Token::StartOfLine);
+    IsAtStartOfLine = false;
+  }
+
+  if (HasLeadingSpace) {
+    Result.setFlag(Token::LeadingSpace);
+    HasLeadingSpace = false;
+  }
+
+  if (HasLeadingEmptyMacro) {
+    Result.setFlag(Token::LeadingEmptyMacro);
+    HasLeadingEmptyMacro = false;
+  }
+
+  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
+  IsAtPhysicalStartOfLine = false;
+  return LexTokenInternal(Result, atPhysicalStartOfLine);
+}
 
 /// LexTokenInternal - This implements a simple C family lexer.  It is an
 /// extremely performance critical piece of code.  This assumes that the buffer
 /// has a null character at the end of the file.  This returns a preprocessing
 /// token, not a normal token, as such, it is an internal interface.  It assumes
 /// that the Flags of result have been cleared before calling this.
-void Lexer::LexTokenInternal(Token &Result) {
+bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
 LexNextToken:
   // New token, can't need cleaning yet.
   Result.clearFlag(Token::NeedsCleaning);
@@ -2791,7 +2852,7 @@ LexNextToken:
     if (isKeepWhitespaceMode()) {
       FormTokenWithChars(Result, CurPtr, tok::unknown);
       // FIXME: The next token will not have LeadingSpace set.
-      return;
+      return true;
     }
 
     BufferPtr = CurPtr;
@@ -2807,43 +2868,32 @@ LexNextToken:
   switch (Char) {
   case 0:  // Null.
     // Found end of file?
-    if (CurPtr-1 == BufferEnd) {
-      // Read the PP instance variable into an automatic variable, because
-      // LexEndOfFile will often delete 'this'.
-      Preprocessor *PPCache = PP;
-      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
-        return;   // Got a token to return.
-      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
-      return PPCache->Lex(Result);
-    }
+    if (CurPtr-1 == BufferEnd)
+      return LexEndOfFile(Result, CurPtr-1);
 
     // Check if we are performing code completion.
     if (isCodeCompletionPoint(CurPtr-1)) {
       // Return the code-completion token.
       Result.startToken();
       FormTokenWithChars(Result, CurPtr, tok::code_completion);
-      return;
+      return true;
     }
 
     if (!isLexingRawMode())
       Diag(CurPtr-1, diag::null_in_file);
     Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
+    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+      return true; // KeepWhitespaceMode
 
-    goto LexNextToken;   // GCC isn't tail call eliminating.
+    // We know the lexer hasn't changed, so just try again with this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
+    goto LexNextToken;
       
   case 26:  // DOS & CP/M EOF: "^Z".
     // If we're in Microsoft extensions mode, treat this as end of file.
-    if (LangOpts.MicrosoftExt) {
-      // Read the PP instance variable into an automatic variable, because
-      // LexEndOfFile will often delete 'this'.
-      Preprocessor *PPCache = PP;
-      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
-        return;   // Got a token to return.
-      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
-      return PPCache->Lex(Result);
-    }
+    if (LangOpts.MicrosoftExt)
+      return LexEndOfFile(Result, CurPtr-1);
+
     // If Microsoft extensions are disabled, this is just random garbage.
     Kind = tok::unknown;
     break;
@@ -2862,6 +2912,7 @@ LexNextToken:
 
       // Since we consumed a newline, we are back at the start of a line.
       IsAtStartOfLine = true;
+      IsAtPhysicalStartOfLine = true;
 
       Kind = tok::eod;
       break;
@@ -2870,17 +2921,20 @@ LexNextToken:
     // No leading whitespace seen so far.
     Result.clearFlag(Token::LeadingSpace);
 
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
-    goto LexNextToken;   // GCC isn't tail call eliminating.
+    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+      return true; // KeepWhitespaceMode
+
+    // We only saw whitespace, so just try again with this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
+    goto LexNextToken;
   case ' ':
   case '\t':
   case '\f':
   case '\v':
   SkipHorizontalWhitespace:
     Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
+    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+      return true; // KeepWhitespaceMode
 
   SkipIgnoredUnits:
     CurPtr = BufferPtr;
@@ -2890,17 +2944,19 @@ LexNextToken:
     if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
         LangOpts.LineComment &&
         (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
-      if (SkipLineComment(Result, CurPtr+2))
-        return; // There is a token to return.
+      if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+        return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
-      if (SkipBlockComment(Result, CurPtr+2))
-        return; // There is a token to return.
+      if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+        return true; // There is a token to return.
       goto SkipIgnoredUnits;
     } else if (isHorizontalWhitespace(*CurPtr)) {
       goto SkipHorizontalWhitespace;
     }
-    goto LexNextToken;   // GCC isn't tail call eliminating.
+    // We only saw whitespace, so just try again with this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
+    goto LexNextToken;
       
   // C99 6.4.4.1: Integer Constants.
   // C99 6.4.4.2: Floating Constants.
@@ -3193,8 +3249,9 @@ LexNextToken:
           TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
 
       if (TreatAsComment) {
-        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
-          return; // There is a token to return.
+        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                            TokAtPhysicalStartOfLine))
+          return true; // There is a token to return.
 
         // It is common for the tokens immediately after a // comment to be
         // whitespace (indentation for the next line).  Instead of going through
@@ -3204,9 +3261,13 @@ LexNextToken:
     }
 
     if (Char == '*') {  // /**/ comment.
-      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
-        return; // There is a token to return.
-      goto LexNextToken;   // GCC isn't tail call eliminating.
+      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                           TokAtPhysicalStartOfLine))
+        return true; // There is a token to return.
+
+      // We only saw whitespace, so just try again with this lexer.
+      // (We manually eliminate the tail call to avoid recursion.)
+      goto LexNextToken;
     }
 
     if (Char == '=') {
@@ -3241,7 +3302,7 @@ LexNextToken:
         // it's actually the start of a preprocessing directive.  Callback to
         // the preprocessor to handle it.
         // FIXME: -fpreprocessed mode??
-        if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+        if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
           goto HandleDirective;
 
         Kind = tok::hash;
@@ -3407,7 +3468,7 @@ LexNextToken:
       // it's actually the start of a preprocessing directive.  Callback to
       // the preprocessor to handle it.
       // FIXME: -fpreprocessed mode??
-      if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+      if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
         goto HandleDirective;
 
       Kind = tok::hash;
@@ -3424,8 +3485,18 @@ LexNextToken:
 
   // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result))
+    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+          return true; // KeepWhitespaceMode
+
+        // We only saw whitespace, so just try again with this lexer.
+        // (We manually eliminate the tail call to avoid recursion.)
+        goto LexNextToken;
+      }
+
       return LexUnicode(Result, CodePoint, CurPtr);
+    }
 
     Kind = tok::unknown;
     break;
@@ -3446,8 +3517,17 @@ LexNextToken:
                                   (const UTF8 *)BufferEnd,
                                   &CodePoint,
                                   strictConversion);
-    if (Status == conversionOK)
+    if (Status == conversionOK) {
+      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+          return true; // KeepWhitespaceMode
+
+        // We only saw whitespace, so just try again with this lexer.
+        // (We manually eliminate the tail call to avoid recursion.)
+        goto LexNextToken;
+      }
       return LexUnicode(Result, CodePoint, CurPtr);
+    }
     
     if (isLexingRawMode() || ParsingPreprocessorDirective ||
         PP->isPreprocessedOutput()) {
@@ -3462,6 +3542,9 @@ LexNextToken:
     Diag(CurPtr, diag::err_invalid_utf8);
 
     BufferPtr = CurPtr+1;
+    // We're pretending the character didn't exist, so just try again with
+    // this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
     goto LexNextToken;
   }
   }
@@ -3471,7 +3554,7 @@ LexNextToken:
 
   // Update the location of token as well as BufferPtr.
   FormTokenWithChars(Result, CurPtr, Kind);
-  return;
+  return true;
 
 HandleDirective:
   // We parsed a # character and it's the start of a preprocessing directive.
@@ -3482,21 +3565,9 @@ HandleDirective:
   if (PP->hadModuleLoaderFatalFailure()) {
     // With a fatal failure in the module loader, we abort parsing.
     assert(Result.is(tok::eof) && "Preprocessor did not set tok:eof");
-    return;
+    return true;
   }
 
-  // As an optimization, if the preprocessor didn't switch lexers, tail
-  // recurse.
-  if (PP->isCurrentLexer(this)) {
-    // Start a new token.  If this is a #include or something, the PP may
-    // want us starting at the beginning of the line again.  If so, set
-    // the StartOfLine flag and clear LeadingSpace.
-    if (IsAtStartOfLine) {
-      Result.setFlag(Token::StartOfLine);
-      Result.clearFlag(Token::LeadingSpace);
-      IsAtStartOfLine = false;
-    }
-    goto LexNextToken;   // GCC isn't tail call eliminating.
-  }
-  return PP->Lex(Result);
+  // We parsed the directive; lex a token with the new state.
+  return false;
 }
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index e521032..1193c3c 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -231,6 +231,19 @@ static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir,
   Result = File->getName();
 }
 
+void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+  if (CurTokenLexer) {
+    CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result);
+    return;
+  }
+  if (CurLexer) {
+    CurLexer->PropagateLineStartLeadingSpaceInfo(Result);
+    return;
+  }
+  // FIXME: Handle other kinds of lexers?  It generally shouldn't matter,
+  // but it might if they're empty?
+}
+
 /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
 /// the current file.  This either returns the EOF token or pops a level off
 /// the include stack and keeps going.
@@ -320,6 +333,9 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
     // We're done with the #included file.
     RemoveTopOfLexerStack();
 
+    // Propagate info about start-of-line/leading white-space/etc.
+    PropagateLineStartLeadingSpaceInfo(Result);
+
     // Notify the client, if desired, that we are in a new source file.
     if (Callbacks && !isEndOfMacro && CurPPLexer) {
       SrcMgr::CharacteristicKind FileType =
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 0eb1169..e67a6b2 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -225,7 +225,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
     if (Callbacks) Callbacks->MacroExpands(Identifier, MD,
                                            Identifier.getLocation(),/*Args=*/0);
     ExpandBuiltinMacro(Identifier);
-    return false;
+    return true;
   }
 
   /// Args - If this is a function-like macro expansion, this contains,
@@ -239,11 +239,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
 
   // If this is a function-like macro, read the arguments.
   if (MI->isFunctionLike()) {
-    // C99 6.10.3p10: If the preprocessing token immediately after the macro
-    // name isn't a '(', this macro should not be expanded.
-    if (!isNextPPTokenLParen())
-      return true;
-
     // Remember that we are now parsing the arguments to a macro invocation.
     // Preprocessor directives used inside macro arguments are not portable, and
     // this enables the warning.
@@ -254,7 +249,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
     InMacroArgs = false;
 
     // If there was an error parsing the arguments, bail out.
-    if (Args == 0) return false;
+    if (Args == 0) return true;
 
     ++NumFnMacroExpanded;
   } else {
@@ -314,25 +309,12 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
     // No need for arg info.
     if (Args) Args->destroy(*this);
 
-    // Ignore this macro use, just return the next token in the current
-    // buffer.
-    bool HadLeadingSpace = Identifier.hasLeadingSpace();
-    bool IsAtStartOfLine = Identifier.isAtStartOfLine();
-
-    Lex(Identifier);
-
-    // If the identifier isn't on some OTHER line, inherit the leading
-    // whitespace/first-on-a-line property of this token.  This handles
-    // stuff like "! XX," -> "! ," and "   XX," -> "    ,", when XX is
-    // empty.
-    if (!Identifier.isAtStartOfLine()) {
-      if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
-      if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
-    }
+    // Propagate whitespace info as if we had pushed, then popped,
+    // a macro context.
     Identifier.setFlag(Token::LeadingEmptyMacro);
+    PropagateLineStartLeadingSpaceInfo(Identifier);
     ++NumFastMacroExpanded;
     return false;
-
   } else if (MI->getNumTokens() == 1 &&
              isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
                                            *this)) {
@@ -378,15 +360,11 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
     // Since this is not an identifier token, it can't be macro expanded, so
     // we're done.
     ++NumFastMacroExpanded;
-    return false;
+    return true;
   }
 
   // Start expanding the macro.
   EnterMacro(Identifier, ExpansionEnd, MI, Args);
-
-  // Now that the macro is at the top of the include stack, ask the
-  // preprocessor to read the next token from it.
-  Lex(Identifier);
   return false;
 }
 
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index 32da92a..e2629a3 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -43,9 +43,7 @@ PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
 }
 
-void PTHLexer::Lex(Token& Tok) {
-LexNextToken:
-
+bool PTHLexer::Lex(Token& Tok) {
   //===--------------------------------------==//
   // Read the raw token data.
   //===--------------------------------------==//
@@ -90,8 +88,9 @@ LexNextToken:
     Tok.setKind(II->getTokenID());
 
     if (II->isHandleIdentifierCase())
-      PP->HandleIdentifier(Tok);
-    return;
+      return PP->HandleIdentifier(Tok);
+
+    return true;
   }
 
   //===--------------------------------------==//
@@ -101,16 +100,10 @@ LexNextToken:
     // Save the end-of-file token.
     EofToken = Tok;
 
-    // Save 'PP' to 'PPCache' as LexEndOfFile can delete 'this'.
-    Preprocessor *PPCache = PP;
-
     assert(!ParsingPreprocessorDirective);
     assert(!LexingRawMode);
-    
-    if (LexEndOfFile(Tok))
-      return;
 
-    return PPCache->Lex(Tok);
+    return LexEndOfFile(Tok);
   }
 
   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
@@ -118,19 +111,17 @@ LexNextToken:
     assert(!LexingRawMode);
     PP->HandleDirective(Tok);
 
-    if (PP->isCurrentLexer(this))
-      goto LexNextToken;
-
-    return PP->Lex(Tok);
+    return false;
   }
 
   if (TKind == tok::eod) {
     assert(ParsingPreprocessorDirective);
     ParsingPreprocessorDirective = false;
-    return;
+    return true;
   }
 
   MIOpt.ReadToken();
+  return true;
 }
 
 bool PTHLexer::LexEndOfFile(Token &Result) {
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 035f751..ed6a5e4 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -614,7 +614,7 @@ void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
 /// IdentifierInfo methods that compute these properties will need to change to
 /// match.
-void Preprocessor::HandleIdentifier(Token &Identifier) {
+bool Preprocessor::HandleIdentifier(Token &Identifier) {
   assert(Identifier.getIdentifierInfo() &&
          "Can't handle identifiers without identifier info!");
 
@@ -648,8 +648,10 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
     MacroInfo *MI = MD->getMacroInfo();
     if (!DisableMacroExpansion) {
       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
-        if (!HandleMacroExpandedIdentifier(Identifier, MD))
-          return;
+        // C99 6.10.3p10: If the preprocessing token immediately after the
+        // macro name isn't a '(', this macro should not be expanded.
+        if (!MI->isFunctionLike() || isNextPPTokenLParen())
+          return HandleMacroExpandedIdentifier(Identifier, MD);
       } else {
         // C99 6.10.3.4p2 says that a disabled macro may never again be
         // expanded, even if it's in a context where it could be expanded in the
@@ -698,6 +700,7 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
     ModuleImportExpectsIdentifier = true;
     CurLexerKind = CLK_LexAfterModuleImport;
   }
+  return true;
 }
 
 /// \brief Lex a token following the 'import' contextual keyword.
diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp
index 5a59849..33ccbc0 100644
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp
@@ -38,7 +38,10 @@ void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
   ParsingFilename = true;
 
   // Lex the filename.
-  IndirectLex(FilenameTok);
+  if (LexingRawMode)
+    IndirectLex(FilenameTok);
+  else
+    PP->Lex(FilenameTok);
 
   // We should have obtained the filename now.
   ParsingFilename = false;
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index b83e766..0213afc 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -416,22 +416,19 @@ void TokenLexer::ExpandFunctionArguments() {
 
 /// Lex - Lex and return a token from this macro stream.
 ///
-void TokenLexer::Lex(Token &Tok) {
+bool TokenLexer::Lex(Token &Tok) {
   // Lexing off the end of the macro, pop this macro off the expansion stack.
   if (isAtEnd()) {
     // If this is a macro (not a token stream), mark the macro enabled now
     // that it is no longer being expanded.
     if (Macro) Macro->EnableMacro();
 
-    // Pop this context off the preprocessors lexer stack and get the next
-    // token.  This will delete "this" so remember the PP instance var.
-    Preprocessor &PPCache = PP;
-    if (PP.HandleEndOfTokenLexer(Tok))
-      return;
-
-    // HandleEndOfTokenLexer may not return a token.  If it doesn't, lex
-    // whatever is next.
-    return PPCache.Lex(Tok);
+    Tok.startToken();
+    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+    if (CurToken == 0)
+      Tok.setFlag(Token::LeadingEmptyMacro);
+    return PP.HandleEndOfTokenLexer(Tok);
   }
 
   SourceManager &SM = PP.getSourceManager();
@@ -451,7 +448,7 @@ void TokenLexer::Lex(Token &Tok) {
     // When handling the microsoft /##/ extension, the final token is
     // returned by PasteTokens, not the pasted token.
     if (PasteTokens(Tok))
-      return;
+      return true;
 
     TokenIsFromPaste = true;
   }
@@ -482,6 +479,8 @@ void TokenLexer::Lex(Token &Tok) {
   if (isFirstToken) {
     Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
     Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+    AtStartOfLine = false;
+    HasLeadingSpace = false;
   }
 
   // Handle recursive expansion!
@@ -499,10 +498,11 @@ void TokenLexer::Lex(Token &Tok) {
     }
 
     if (!DisableMacroExpansion && II->isHandleIdentifierCase())
-      PP.HandleIdentifier(Tok);
+      return PP.HandleIdentifier(Tok);
   }
 
   // Otherwise, return a normal token.
+  return true;
 }
 
 /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
@@ -824,3 +824,8 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
     updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
   }
 }
+
+void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+  AtStartOfLine = Result.isAtStartOfLine();
+  HasLeadingSpace = Result.hasLeadingSpace();
+}
diff --git a/test/Preprocessor/hash_line.c b/test/Preprocessor/hash_line.c
index 64edae0..c4de9f0 100644
--- a/test/Preprocessor/hash_line.c
+++ b/test/Preprocessor/hash_line.c
@@ -1,11 +1,12 @@
 // The 1 and # should not go on the same line.
 // RUN: %clang_cc1 -E %s | FileCheck --strict-whitespace %s
-// CHECK-NOT: 1{{.*}}#
 // CHECK: {{^1$}}
-// CHECK-NOT: 1{{.*}}#
-// CHECK: {{^      #$}}
-// CHECK-NOT: 1{{.*}}#
-1
+// CHECK-NEXT: {{^      #$}}
+// CHECK-NEXT: {{^2$}}
+// CHECK-NEXT: {{^           #$}}
 #define EMPTY
+#define IDENTITY(X) X
+1
 EMPTY #
-
+2
+IDENTITY() #
diff --git a/test/Preprocessor/macro_expand_empty.c b/test/Preprocessor/macro_expand_empty.c
index e69de29..3fb6394 100644
--- a/test/Preprocessor/macro_expand_empty.c
+++ b/test/Preprocessor/macro_expand_empty.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fsyntax-only %s
+// Check that this doesn't crash
+
+#define IDENTITY1(x) x
+#define IDENTITY2(x) IDENTITY1(x) IDENTITY1(x) IDENTITY1(x) IDENTITY1(x)
+#define IDENTITY3(x) IDENTITY2(x) IDENTITY2(x) IDENTITY2(x) IDENTITY2(x)
+#define IDENTITY4(x) IDENTITY3(x) IDENTITY3(x) IDENTITY3(x) IDENTITY3(x)
+#define IDENTITY5(x) IDENTITY4(x) IDENTITY4(x) IDENTITY4(x) IDENTITY4(x)
+#define IDENTITY6(x) IDENTITY5(x) IDENTITY5(x) IDENTITY5(x) IDENTITY5(x)
+#define IDENTITY7(x) IDENTITY6(x) IDENTITY6(x) IDENTITY6(x) IDENTITY6(x)
+#define IDENTITY8(x) IDENTITY7(x) IDENTITY7(x) IDENTITY7(x) IDENTITY7(x)
+#define IDENTITY9(x) IDENTITY8(x) IDENTITY8(x) IDENTITY8(x) IDENTITY8(x)
+#define IDENTITY0(x) IDENTITY9(x) IDENTITY9(x) IDENTITY9(x) IDENTITY9(x)
+IDENTITY0()
diff --git a/test/SemaCXX/warn-empty-body.cpp b/test/SemaCXX/warn-empty-body.cpp
index d643ced..d3aaac1 100644
--- a/test/SemaCXX/warn-empty-body.cpp
+++ b/test/SemaCXX/warn-empty-body.cpp
@@ -269,3 +269,8 @@ void test_template_inst(int x) {
   test_template<double>(x);
 }
 
+#define IDENTITY(a) a
+void test7(int x, int y) {
+  if (x) IDENTITY(); // no-warning
+}
+


More information about the cfe-commits mailing list