[cfe-commits] r64480 - in /cfe/trunk: lib/Lex/Preprocessor.cpp lib/Rewrite/HTMLRewrite.cpp test/Misc/emit-html.c

Chris Lattner sabre at nondot.org
Fri Feb 13 11:33:24 PST 2009


Author: lattner
Date: Fri Feb 13 13:33:24 2009
New Revision: 64480

URL: http://llvm.org/viewvc/llvm-project?rev=64480&view=rev
Log:
Fix rdar://6562329, a static analyzer crash Ted noticed on 
wine sources.  This was happening because HighlightMacros was 
calling EnterMainFile multiple times on the same preprocessor
object and getting an assert due to the new #line stuff (the
file in question was bison output with #line directives).

The fix for this is to not reenter the file.  Instead, 
relex the tokens in raw mode, swizzle them a bit and repreprocess
the token stream.  An added bonus of this is that rewrite macros
will now hilight the macro definition as well as its uses.  Woo.


Added:
    cfe/trunk/test/Misc/emit-html.c
Modified:
    cfe/trunk/lib/Lex/Preprocessor.cpp
    cfe/trunk/lib/Rewrite/HTMLRewrite.cpp

Modified: cfe/trunk/lib/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Preprocessor.cpp?rev=64480&r1=64479&r2=64480&view=diff

==============================================================================
--- cfe/trunk/lib/Lex/Preprocessor.cpp (original)
+++ cfe/trunk/lib/Lex/Preprocessor.cpp Fri Feb 13 13:33:24 2009
@@ -626,7 +626,10 @@
 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
 /// which implicitly adds the builtin defines etc.
 void Preprocessor::EnterMainSourceFile() {
-  
+  // We do not allow the preprocessor to reenter the main file.  Doing so will
+  // cause FileID's to accumulate information from both runs (e.g. #line
+  // information) and predefined macros aren't guaranteed to be set properly.
+  assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
   FileID MainFileID = SourceMgr.getMainFileID();
   
   // Enter the main file source buffer.

Modified: cfe/trunk/lib/Rewrite/HTMLRewrite.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Rewrite/HTMLRewrite.cpp?rev=64480&r1=64479&r2=64480&view=diff

==============================================================================
--- cfe/trunk/lib/Rewrite/HTMLRewrite.cpp (original)
+++ cfe/trunk/lib/Rewrite/HTMLRewrite.cpp Fri Feb 13 13:33:24 2009
@@ -344,8 +344,8 @@
 void html::SyntaxHighlight(Rewriter &R, FileID FID, Preprocessor &PP) {
   RewriteBuffer &RB = R.getEditBuffer(FID);
 
-  const SourceManager &SourceMgr = PP.getSourceManager();
-  Lexer L(FID, SourceMgr, PP.getLangOptions());
+  const SourceManager &SM = PP.getSourceManager();
+  Lexer L(FID, SM, PP.getLangOptions());
   const char *BufferStart = L.getBufferStart();
   
   // Inform the preprocessor that we want to retain comments as tokens, so we 
@@ -360,7 +360,7 @@
   while (Tok.isNot(tok::eof)) {
     // Since we are lexing unexpanded tokens, all tokens are from the main
     // FileID.
-    unsigned TokOffs = SourceMgr.getFileOffset(Tok.getLocation());
+    unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
     unsigned TokLen = Tok.getLength();
     switch (Tok.getKind()) {
     default: break;
@@ -398,7 +398,7 @@
       unsigned TokEnd = TokOffs+TokLen;
       L.LexFromRawLexer(Tok);
       while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
-        TokEnd = SourceMgr.getFileOffset(Tok.getLocation())+Tok.getLength();
+        TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
         L.LexFromRawLexer(Tok);
       }
       
@@ -416,23 +416,55 @@
 }
 
 /// HighlightMacros - This uses the macro table state from the end of the
-/// file, to reexpand macros and insert (into the HTML) information about the
+/// file, to re-expand macros and insert (into the HTML) information about the
 /// macro expansions.  This won't be perfectly perfect, but it will be
 /// reasonably close.
 void html::HighlightMacros(Rewriter &R, FileID FID, Preprocessor& PP) {
   
   RewriteBuffer &RB = R.getEditBuffer(FID);
   
+  // Re-lex the raw token stream into a token buffer.
+  const SourceManager &SM = PP.getSourceManager();
+  std::vector<Token> TokenStream;
+  
+  Lexer L(FID, SM, PP.getLangOptions());
+  
+  // Lex all the tokens in raw mode, to avoid entering #includes or expanding
+  // macros.
+  while (1) {
+    Token Tok;
+    L.LexFromRawLexer(Tok);
+    
+    // If this is a # at the start of a line, discard it from the token stream.
+    // We don't want the re-preprocess step to see #defines, #includes or other
+    // preprocessor directives.
+    if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
+      continue;
+    
+    // If this raw token is an identifier, the raw lexer won't have looked up
+    // the corresponding identifier info for it.  Do this now so that it will be
+    // macro expanded when we re-preprocess it.
+    if (Tok.is(tok::identifier)) {
+      // Change the kind of this identifier to the appropriate token kind, e.g.
+      // turning "for" into a keyword.
+      Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID());
+    }    
+      
+    TokenStream.push_back(Tok);
+    
+    if (Tok.is(tok::eof)) break;
+  }
+  
   // Inform the preprocessor that we don't want comments.
   PP.SetCommentRetentionState(false, false);
-  
-  // Start parsing the specified input file.
-  PP.EnterMainSourceFile();
+
+  // Enter the tokens we just lexed.  This will cause them to be macro expanded
+  // but won't enter sub-files (because we removed #'s).
+  PP.EnterTokenStream(&TokenStream[0], TokenStream.size(), false, false);
   
   TokenConcatenation ConcatInfo(PP);
   
   // Lex all the tokens.
-  const SourceManager &SourceMgr = PP.getSourceManager();
   Token Tok;
   PP.Lex(Tok);
   while (Tok.isNot(tok::eof)) {
@@ -443,8 +475,8 @@
     }
     
     // Ignore tokens whose instantiation location was not the main file.
-    SourceLocation LLoc = SourceMgr.getInstantiationLoc(Tok.getLocation());
-    std::pair<FileID, unsigned> LLocInfo = SourceMgr.getDecomposedLoc(LLoc);
+    SourceLocation LLoc = SM.getInstantiationLoc(Tok.getLocation());
+    std::pair<FileID, unsigned> LLocInfo = SM.getDecomposedLoc(LLoc);
     
     if (LLocInfo.first != FID) {
       PP.Lex(Tok);
@@ -457,7 +489,7 @@
     // Get the size of current macro call itself.
     // FIXME: This should highlight the args of a function-like
     // macro, using a heuristic.
-    unsigned TokLen = Lexer::MeasureTokenLength(LLoc, SourceMgr);
+    unsigned TokLen = Lexer::MeasureTokenLength(LLoc, SM);
     
     unsigned TokOffs = LLocInfo.second;
     // Highlight the macro invocation itself.
@@ -476,7 +508,7 @@
     // instantiation.  It would be really nice to pop up a window with all the
     // spelling of the tokens or something.
     while (!Tok.is(tok::eof) &&
-           SourceMgr.getInstantiationLoc(Tok.getLocation()) == LLoc) {
+           SM.getInstantiationLoc(Tok.getLocation()) == LLoc) {
       // Insert a newline if the macro expansion is getting large.
       if (LineLen > 60) {
         Expansion += "<br>";

Added: cfe/trunk/test/Misc/emit-html.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Misc/emit-html.c?rev=64480&view=auto

==============================================================================
--- cfe/trunk/test/Misc/emit-html.c (added)
+++ cfe/trunk/test/Misc/emit-html.c Fri Feb 13 13:33:24 2009
@@ -0,0 +1,5 @@
+// RUN: clang %s -emit-html -o -
+
+// rdar://6562329
+#line 42 "foo.c"
+





More information about the cfe-commits mailing list