[cfe-commits] r38767 - in /cfe/cfe/trunk: Driver/PrintPreprocessedOutput.cpp README.txt

Wed Jul 11 09:24:27 PDT 2007

Author: sabre
Date: Wed Jul 11 11:24:27 2007
New Revision: 38767

URL: http://llvm.org/viewvc/llvm-project?rev=38767&view=rev
Log:
Implement paste avoidance, implementing Preprocessor/output_paste_avoid.c

Modified:
    cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp
    cfe/cfe/trunk/README.txt

Modified: cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp?rev=38767&r1=38766&r2=38767&view=diff

==============================================================================

--- cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp (original)
+++ cfe/cfe/trunk/Driver/PrintPreprocessedOutput.cpp Wed Jul 11 11:24:27 2007
@@ -266,12 +266,101 @@
 };
 } // end anonymous namespace
 
+/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
+/// the two individual tokens to be lexed as a single token, return true (which
+/// causes a space to be printed between them).  This allows the output of -E
+/// mode to be lexed to the same token stream as lexing the input directly
+/// would.
+///
+/// This code must conservatively return true if it doesn't want to be 100%
+/// accurate.  This will cause the output to include extra space characters, but
+/// the resulting output won't have incorrect concatenations going on.  Examples
+/// include "..", which we print with a space between, because we don't want to
+/// track enough to tell "x.." from "...".
+static bool AvoidConcat(const LexerToken &PrevTok, const LexerToken &Tok,
+                        Preprocessor &PP) {
+  char Buffer[256];
+  
+  // If we haven't emitted a token on this line yet, PrevTok isn't useful to
+  // look at and no concatenation could happen anyway.
+  if (!EmodeEmittedTokensOnThisLine)
+    return false;
+
+  // Basic algorithm: we look at the first character of the second token, and
+  // determine whether it, if appended to the first token, would form (or would
+  // contribute) to a larger token if concatenated.
+  char FirstChar;
+  if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    // Avoid spelling identifiers, the most common form of token.
+    FirstChar = II->getName()[0];
+  } else if (Tok.getLength() < 256) {
+    const char *TokPtr = Buffer;
+    unsigned Len = PP.getSpelling(Tok, TokPtr);
+    FirstChar = TokPtr[0];
+  } else {
+    FirstChar = PP.getSpelling(Tok)[0];
+  }
+  
+  tok::TokenKind PrevKind = PrevTok.getKind();
+  if (PrevTok.getIdentifierInfo())  // Language keyword or named operator.
+    PrevKind = tok::identifier;
+  
+  switch (PrevKind) {
+  default: return false;
+  case tok::identifier:   // id+id or id+number or id+L"foo".
+    return isalnum(FirstChar) || FirstChar == '_';
+  case tok::numeric_constant:
+    return isalnum(FirstChar) || Tok.getKind() == tok::numeric_constant ||
+           FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
+  case tok::period:          // ..., .*, .1234
+    return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
+  case tok::amp:             // &&, &=
+    return FirstChar == '&' || FirstChar == '=';
+  case tok::plus:            // ++, +=
+    return FirstChar == '+' || FirstChar == '=';
+  case tok::minus:           // --, ->, -=, ->*
+    return FirstChar == '-' || FirstChar == '>' || FirstChar == '=';
+  case tok::slash:           // /=, /*, //
+    return FirstChar == '=' || FirstChar == '*' || FirstChar == '/';
+  case tok::less:            // <<, <<=, <=, <?=, <?, <:, <%
+    return FirstChar == '<' || FirstChar == '?' || FirstChar == '=' ||
+           FirstChar == ':' || FirstChar == '%';
+  case tok::greater:         // >>, >=, >>=, >?=, >?, ->*
+    return FirstChar == '>' || FirstChar == '?' || FirstChar == '=' || 
+           FirstChar == '*';
+  case tok::pipe:            // ||, |=
+    return FirstChar == '|' || FirstChar == '=';
+  case tok::percent:         // %=, %>, %:
+    return FirstChar == '=' || FirstChar == '>' || FirstChar == ':';
+  case tok::colon:           // ::, :>
+    return FirstChar == ':' || FirstChar == '>';
+  case tok::hash:            // ##, #@, %:%:
+    return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
+  case tok::question:        // <?=, >?=, ??x -> trigraphs.
+    // Have to check for <?= in case <? is disabled.
+    return FirstChar == '?' || FirstChar == '=';
+  case tok::arrow:           // ->*
+    return FirstChar == '*';
+    
+  case tok::star:            // *=
+  case tok::exclaim:         // !=
+  case tok::lessless:        // <<=
+  case tok::greaterequal:    // >>=
+  case tok::caret:           // ^=
+  case tok::equal:           // ==
+  case tok::lessquestion:    // <?=
+  case tok::greaterquestion: // >?=
+    // Cases that concatenate only if the next char is =.
+    return FirstChar == '=';
+  }
+}
+
 /// DoPrintPreprocessedInput - This implements -E mode.
 ///
 void clang::DoPrintPreprocessedInput(Preprocessor &PP) {
   InitOutputBuffer();
   
-  LexerToken Tok;
+  LexerToken Tok, PrevTok;
   char Buffer[256];
   EModeCurLine = 0;
   EModeCurFilename = "\"<uninit>\"";
@@ -283,12 +372,15 @@
   PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma"));
   PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC"));
   do {
+    PrevTok = Tok;
     PP.Lex(Tok);
     
     // If this token is at the start of a line, emit newlines if needed.
     if (Tok.isAtStartOfLine()) {
       HandleFirstTokOnLine(Tok, PP);
-    } else if (Tok.hasLeadingSpace()) {
+    } else if (Tok.hasLeadingSpace() || 
+               // Don't print "-" next to "-", it would form "--".
+               AvoidConcat(PrevTok, Tok, PP)) {
       OutputChar(' ');
     }
     

Modified: cfe/cfe/trunk/README.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/README.txt?rev=38767&r1=38766&r2=38767&view=diff

==============================================================================
--- cfe/cfe/trunk/README.txt (original)
+++ cfe/cfe/trunk/README.txt Wed Jul 11 11:24:27 2007
@@ -66,7 +66,6 @@
 
 Preprocessor:
  * Comma swallowing GNU token-pasting extension.
- * Paste avoidance in -E output mode.
  * #line / #file directives
  * -C output mode in -E mode.
  * MSExtension: "L#param" stringizes to a wide string literal.