[cfe-commits] r38646 - in /cfe/cfe/trunk: Lex/Lexer.cpp Lex/Pragma.cpp Lex/Preprocessor.cpp include/clang/Lex/Lexer.h include/clang/Lex/MultipleIncludeOpt.h include/clang/Lex/Preprocessor.h

Wed Jul 11 09:23:29 PDT 2007

Author: sabre
Date: Wed Jul 11 11:23:29 2007
New Revision: 38646

URL: http://llvm.org/viewvc/llvm-project?rev=38646&view=rev
Log:
Implement the automaton for recognizing files with controlling macros.

Added:
    cfe/cfe/trunk/include/clang/Lex/MultipleIncludeOpt.h   (with props)
Modified:
    cfe/cfe/trunk/Lex/Lexer.cpp
    cfe/cfe/trunk/Lex/Pragma.cpp
    cfe/cfe/trunk/Lex/Preprocessor.cpp
    cfe/cfe/trunk/include/clang/Lex/Lexer.h
    cfe/cfe/trunk/include/clang/Lex/Preprocessor.h

Modified: cfe/cfe/trunk/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Lexer.cpp?rev=38646&r1=38645&r2=38646&view=diff

==============================================================================

--- cfe/cfe/trunk/Lex/Lexer.cpp (original)
+++ cfe/cfe/trunk/Lex/Lexer.cpp Wed Jul 11 11:23:29 2007
@@ -979,6 +979,8 @@
     goto LexNextToken;   // GCC isn't tail call eliminating.
 
   case 'L':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
     Char = getCharAndSize(CurPtr, SizeTmp);
 
     // Wide string literal.
@@ -1000,20 +1002,28 @@
   case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
   case 'v': case 'w': case 'x': case 'y': case 'z':
   case '_':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
     return LexIdentifier(Result, CurPtr);
     
   // C99 6.4.4.1: Integer Constants.
   // C99 6.4.4.2: Floating Constants.
   case '0': case '1': case '2': case '3': case '4':
   case '5': case '6': case '7': case '8': case '9':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
     return LexNumericConstant(Result, CurPtr);
     
   // C99 6.4.4: Character Constants.
   case '\'':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
     return LexCharConstant(Result, CurPtr);
 
   // C99 6.4.5: String Literals.
   case '"':
+    // Notify MIOpt that we read a non-whitespace/non-comment token.
+    MIOpt.ReadToken();
     return LexStringLiteral(Result, CurPtr);
 
   // C99 6.4.6: Punctuators.
@@ -1041,6 +1051,9 @@
   case '.':
     Char = getCharAndSize(CurPtr, SizeTmp);
     if (Char >= '0' && Char <= '9') {
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
+
       return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
     } else if (Features.CPlusPlus && Char == '*') {
       Result.SetKind(tok::periodstar);
@@ -1333,6 +1346,8 @@
       break;
     } else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers.
       Diag(CurPtr-1, diag::ext_dollar_in_identifier);
+      // Notify MIOpt that we read a non-whitespace/non-comment token.
+      MIOpt.ReadToken();
       return LexIdentifier(Result, CurPtr);
     }
     
@@ -1341,6 +1356,9 @@
     goto LexNextToken;   // GCC isn't tail call eliminating.
   }
   
+  // Notify MIOpt that we read a non-whitespace/non-comment token.
+  MIOpt.ReadToken();
+
   // Update the location of token as well as BufferPtr.
   FormTokenWithChars(Result, CurPtr);
 }

Modified: cfe/cfe/trunk/Lex/Pragma.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Pragma.cpp?rev=38646&r1=38645&r2=38646&view=diff

==============================================================================
--- cfe/cfe/trunk/Lex/Pragma.cpp (original)
+++ cfe/cfe/trunk/Lex/Pragma.cpp Wed Jul 11 11:23:29 2007
@@ -74,6 +74,9 @@
 void Preprocessor::HandlePragmaDirective() {
   ++NumPragma;
   
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+  
   // Invoke the first level of pragma handlers which reads the namespace id.
   LexerToken Tok;
   PragmaHandlers->HandlePragma(*this, Tok);

Modified: cfe/cfe/trunk/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Preprocessor.cpp?rev=38646&r1=38645&r2=38646&view=diff

==============================================================================
--- cfe/cfe/trunk/Lex/Preprocessor.cpp (original)
+++ cfe/cfe/trunk/Lex/Preprocessor.cpp Wed Jul 11 11:23:29 2007
@@ -739,6 +739,14 @@
     return;
   }
   
+  // See if this file had a controlling macro.
+  if (CurLexer) {  // Not ending a macro...
+    if (const IdentifierTokenInfo *ControllingMacro = 
+          CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
+      ;
+    }
+  }
+  
   // If this is a #include'd file, pop it off the include stack and continue
   // lexing the #includer file.
   if (!IncludeMacroStack.empty()) {
@@ -1049,6 +1057,11 @@
   
   ++NumDirectives;
   
+  // We are about to read a token.  For the multiple-include optimization FA to
+  // work, we have to remember if we had read any tokens *before* this 
+  // pp-directive.
+  bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal();
+  
   // Read the next token, the directive flavor.
   LexUnexpandedToken(Result);
   
@@ -1059,6 +1072,7 @@
 
 #if 0
   case tok::numeric_constant:
+    MIOpt.ReadDirective();
     // FIXME: implement # 7 line numbers!
     break;
 #endif
@@ -1073,7 +1087,7 @@
     switch (Result.getIdentifierInfo()->getNameLength()) {
     case 4:
       if (Directive[0] == 'l' && !strcmp(Directive, "line"))
-        ;  // FIXME: implement #line
+        CurLexer->MIOpt.ReadDirective();  // FIXME: implement #line
       if (Directive[0] == 'e' && !strcmp(Directive, "elif"))
         return HandleElifDirective(Result);
       if (Directive[0] == 's' && !strcmp(Directive, "sccs"))
@@ -1083,7 +1097,7 @@
       if (Directive[0] == 'e' && !strcmp(Directive, "endif"))
         return HandleEndifDirective(Result);
       if (Directive[0] == 'i' && !strcmp(Directive, "ifdef"))
-        return HandleIfdefDirective(Result, false);
+        return HandleIfdefDirective(Result, false, true/*not valid for miopt*/);
       if (Directive[0] == 'u' && !strcmp(Directive, "undef"))
         return HandleUndefDirective(Result);
       if (Directive[0] == 'e' && !strcmp(Directive, "error"))
@@ -1095,7 +1109,7 @@
       if (Directive[0] == 'd' && !strcmp(Directive, "define"))
         return HandleDefineDirective(Result);
       if (Directive[0] == 'i' && !strcmp(Directive, "ifndef"))
-        return HandleIfdefDirective(Result, true);
+        return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective);
       if (Directive[0] == 'i' && !strcmp(Directive, "import"))
         return HandleImportDirective(Result);
       if (Directive[0] == 'p' && !strcmp(Directive, "pragma"))
@@ -1128,9 +1142,7 @@
   Diag(Result, diag::err_pp_invalid_directive);
   
   // Read the rest of the PP line.
-  do {
-    Lex(Result);
-  } while (Result.getKind() != tok::eom);
+  DiscardUntilEndOfDirective();
   
   // Okay, we're done parsing the directive.
 }
@@ -1151,8 +1163,13 @@
 /// HandleIdentSCCSDirective - Handle a #ident/#sccs directive.
 ///
 void Preprocessor::HandleIdentSCCSDirective(LexerToken &Tok) {
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+  
+  // Yes, this directive is an extension.
   Diag(Tok, diag::ext_pp_ident_directive);
   
+  // Read the string argument.
   LexerToken StrTok;
   Lex(StrTok);
   
@@ -1179,6 +1196,10 @@
                                           const DirectoryLookup *LookupFrom,
                                           bool isImport) {
   ++NumIncluded;
+
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+
   LexerToken FilenameTok;
   std::string Filename = CurLexer->LexIncludeFilename(FilenameTok);
   
@@ -1225,8 +1246,7 @@
   }
 
   // Look up the file, create a File ID for it.
-  unsigned FileID = 
-    SourceMgr.createFileID(File, FilenameTok.getLocation());
+  unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation());
   if (FileID == 0)
     return Diag(FilenameTok, diag::err_pp_file_not_found);
 
@@ -1276,6 +1296,10 @@
 ///
 void Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
   ++NumDefined;
+
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+
   LexerToken MacroNameTok;
   ReadMacroName(MacroNameTok, true);
   
@@ -1345,6 +1369,10 @@
 ///
 void Preprocessor::HandleUndefDirective(LexerToken &UndefTok) {
   ++NumUndefined;
+
+  // Inform MIOpt that we found a side-effect of parsing this file.
+  CurLexer->MIOpt.ReadDirective();
+
   LexerToken MacroNameTok;
   ReadMacroName(MacroNameTok, true);
   
@@ -1375,12 +1403,15 @@
 //===----------------------------------------------------------------------===//
 
 /// HandleIfdefDirective - Implements the #ifdef/#ifndef directive.  isIfndef is
-/// true when this is a #ifndef directive.
+/// true when this is a #ifndef directive.  ReadAnyTokensBeforeDirective is true
+/// if any tokens have been returned or pp-directives activated before this
+/// #ifndef has been lexed.
 ///
-void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
+void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef,
+                                        bool ReadAnyTokensBeforeDirective) {
   ++NumIf;
   LexerToken DirectiveTok = Result;
-  
+
   LexerToken MacroNameTok;
   ReadMacroName(MacroNameTok);
   
@@ -1389,7 +1420,14 @@
     return;
   
   // Check to see if this is the last token on the #if[n]def line.
-  CheckEndOfDirective("#ifdef");
+  CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef");
+  
+  // If the start of a top-level #ifdef, inform MIOpt.
+  if (!ReadAnyTokensBeforeDirective &&
+      CurLexer->getConditionalStackDepth() == 0) {
+    assert(isIfndef && "#ifdef shouldn't reach here");
+    CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo());
+  }
   
   MacroInfo *MI = MacroNameTok.getIdentifierInfo()->getMacroInfo();
 
@@ -1413,6 +1451,11 @@
 ///
 void Preprocessor::HandleIfDirective(LexerToken &IfToken) {
   ++NumIf;
+  
+  // FIXME: Detect "#if !defined(X)" for the MIOpt.
+  CurLexer->MIOpt.ReadDirective();
+
+  // Parse and evaluation the conditional expression.
   bool ConditionalTrue = EvaluateDirectiveExpression();
   
   // Should we include the stuff contained by this directive?
@@ -1431,6 +1474,7 @@
 ///
 void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
   ++NumEndif;
+  
   // Check that this is the whole directive.
   CheckEndOfDirective("#endif");
   
@@ -1440,6 +1484,10 @@
     return Diag(EndifToken, diag::err_pp_endif_without_if);
   }
   
+  // If this the end of a top-level #endif, inform MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.ExitTopLevelConditional();
+  
   assert(!CondInfo.WasSkipping && !isSkipping() &&
          "This code should only be reachable in the non-skipping case!");
 }
@@ -1447,12 +1495,17 @@
 
 void Preprocessor::HandleElseDirective(LexerToken &Result) {
   ++NumElse;
+  
   // #else directive in a non-skipping conditional... start skipping.
   CheckEndOfDirective("#else");
   
   PPConditionalInfo CI;
   if (CurLexer->popConditionalLevel(CI))
     return Diag(Result, diag::pp_err_else_without_if);
+  
+  // If this is a top-level #else, inform the MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.FoundTopLevelElse();
 
   // If this is a #else with a #else before it, report the error.
   if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else);
@@ -1465,6 +1518,7 @@
 
 void Preprocessor::HandleElifDirective(LexerToken &ElifToken) {
   ++NumElse;
+  
   // #elif directive in a non-skipping conditional... start skipping.
   // We don't care what the condition is, because we will always skip it (since
   // the block immediately before it was included).
@@ -1474,6 +1528,10 @@
   if (CurLexer->popConditionalLevel(CI))
     return Diag(ElifToken, diag::pp_err_elif_without_if);
   
+  // If this is a top-level #elif, inform the MIOpt.
+  if (CurLexer->getConditionalStackDepth() == 0)
+    CurLexer->MIOpt.FoundTopLevelElse();
+  
   // If this is a #elif with a #else before it, report the error.
   if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else);
 

Modified: cfe/cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/Lexer.h?rev=38646&r1=38645&r2=38646&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/cfe/trunk/include/clang/Lex/Lexer.h Wed Jul 11 11:23:29 2007
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_LEXER_H
 
 #include "clang/Lex/LexerToken.h"
+#include "clang/Lex/MultipleIncludeOpt.h"
 #include <string>
 #include <vector>
 
@@ -66,7 +67,11 @@
   bool ParsingFilename;          // True after #include: turn <xx> into string.
   
   // Context that changes as the file is lexed.
-    
+  
+  /// MIOpt - This is a state machine that detects the #ifndef-wrapping a file 
+  /// idiom for the multiple-include optimization.
+  MultipleIncludeOpt MIOpt;
+  
   /// ConditionalStack - Information about the set of #if/#ifdef/#ifndef blocks
   /// we are currently in.
   std::vector<PPConditionalInfo> ConditionalStack;
@@ -114,7 +119,8 @@
       IsAtStartOfLine = false;
     }
    
-    // Get a token.
+    // Get a token.  Note that this may delete the current lexer if the end of
+    // file is reached.
     LexTokenInternal(Result);
   }
   

Added: cfe/cfe/trunk/include/clang/Lex/MultipleIncludeOpt.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/MultipleIncludeOpt.h?rev=38646&view=auto

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/MultipleIncludeOpt.h (added)
+++ cfe/cfe/trunk/include/clang/Lex/MultipleIncludeOpt.h Wed Jul 11 11:23:29 2007
@@ -0,0 +1,111 @@
+//===--- MultipleIncludeOpt.h - Header Multiple-Include Optzn ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Chris Lattner and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the MultipleIncludeOpt interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_MULTIPLEINCLUDEOPT_H
+#define LLVM_CLANG_MULTIPLEINCLUDEOPT_H
+
+namespace llvm {
+namespace clang {
+class IdentifierTokenInfo;
+
+/// MultipleIncludeOpt - This class implements the simple state machine that the
+/// Lexer class uses to detect files subject to the 'multiple-include'
+/// optimization.  The public methods in this class are triggered by various
+/// events that occur when a file is lexed, and after the entire file is lexed,
+/// information about which macro (if any) controls the header is returned.
+class MultipleIncludeOpt {
+  /// ReadAnyTokens - This is set to false when a file is first opened and true
+  /// any time a token is returned to the client or a (non-multiple-include)
+  /// directive is parsed.  When the final #endif is parsed this is reset back
+  /// to false, that way any tokens before the first #ifdef or after the last
+  /// #endif can be easily detected.
+  bool ReadAnyTokens;
+  
+  /// TheMacro - The controlling macro for a file, if valid.
+  ///
+  const IdentifierTokenInfo *TheMacro;
+public:
+  MultipleIncludeOpt() : ReadAnyTokens(false), TheMacro(0) {}
+  
+  /// Invalidate - Permenantly mark this file as not being suitable for the
+  /// include-file optimization.
+  void Invalidate() {
+    // If we have read tokens but have no controlling macro, the state-machine
+    // below can never "accept".
+    ReadAnyTokens = true;
+    TheMacro = 0;
+  }
+  
+  /// getHasReadAnyTokensVal - This is used for the #ifndef hande-shake at the
+  /// top of the file when reading preprocessor directives.  Otherwise, reading
+  /// the "ifndef x" would count as reading tokens.
+  bool getHasReadAnyTokensVal() const { return ReadAnyTokens; }
+  
+  // If a token or directive is read, remember that we have seen a side-effect
+  // in this file.
+  void ReadToken()     { ReadAnyTokens = true; }
+  void ReadDirective() { ReadAnyTokens = true; } 
+  
+  /// EnterTopLevelIFNDEF - When entering a top-level #ifndef directive (or the
+  /// "#if !defined" equivalent) without any preceding tokens, this method is
+  /// called.
+  void EnterTopLevelIFNDEF(const IdentifierTokenInfo *M) {
+    // Note, we don't care about the input value of 'ReadAnyTokens'.  The caller
+    // ensures that this is only called if there are no tokens read before the
+    // #ifndef.
+    
+    // If the macro is already set, this is after the top-level #endif.
+    if (TheMacro)
+      return Invalidate();
+    
+    // Remember that we're in the #if and that we have the macro.
+    ReadAnyTokens = true;
+    TheMacro = M;
+  }
+
+  /// FoundTopLevelElse - This is invoked when an #else/#elif directive is found
+  /// in the top level conditional in the file.
+  void FoundTopLevelElse() {
+    /// If a #else directive is found at the top level, there is a chunk of the
+    /// file not guarded by the controlling macro.
+    Invalidate();
+  }
+  
+  /// ExitTopLevelConditional - This method is called when the lexer exits the
+  /// top-level conditional.
+  void ExitTopLevelConditional() {
+    // If we have a macro, that means the top of the file was ok.  Set our state
+    // back to "not having read any tokens" so we can detect anything after the
+    // #endif.
+    if (!TheMacro) return Invalidate();
+    
+    // At this point, we haven't "read any tokens" but we do have a controlling
+    // macro.
+    ReadAnyTokens = false;
+  }
+  
+  /// GetControllingMacroAtEndOfFile - Once the entire file has been lexed, if
+  /// there is a controlling macro, return it.
+  const IdentifierTokenInfo *GetControllingMacroAtEndOfFile() const {
+    // If we haven't read any tokens after the #endif, return the controlling
+    // macro if it's valid (if it isn't, it will be null).
+    if (!ReadAnyTokens)
+      return TheMacro;
+    return 0;
+  }
+};
+
+}  // end namespace clang
+}  // end namespace llvm
+
+#endif

Propchange: cfe/cfe/trunk/include/clang/Lex/MultipleIncludeOpt.h

------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cfe/cfe/trunk/include/clang/Lex/MultipleIncludeOpt.h

------------------------------------------------------------------------------
    svn:keywords = Author Date Id Revision

Modified: cfe/cfe/trunk/include/clang/Lex/Preprocessor.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/Preprocessor.h?rev=38646&r1=38645&r2=38646&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/Preprocessor.h (original)
+++ cfe/cfe/trunk/include/clang/Lex/Preprocessor.h Wed Jul 11 11:23:29 2007
@@ -482,7 +482,8 @@
   // HandleUnassertDirective(LexerToken &Tok);
   
   // Conditional Inclusion.
-  void HandleIfdefDirective(LexerToken &Tok, bool isIfndef);
+  void HandleIfdefDirective(LexerToken &Tok, bool isIfndef,
+                            bool ReadAnyTokensBeforeDirective);
   void HandleIfDirective(LexerToken &Tok);
   void HandleEndifDirective(LexerToken &Tok);
   void HandleElseDirective(LexerToken &Tok);