[cfe-commits] Improve performance of SkipBCPLComment()

Wed Aug 18 10:25:21 PDT 2010

I've optimized SkipBCPLComment() to find the first newline character
and then backtrack.

Peter
-------------- next part --------------
Index: lib/Lex/Lexer.cpp
===================================================================

--- lib/Lex/Lexer.cpp	(revision 111366)
+++ lib/Lex/Lexer.cpp	(working copy)
@@ -1107,6 +1107,13 @@
   return false;
 }
 
+// Helper function for SkipBCPLComment
+static inline bool nextLineIsComment(const char * Ptr) {
+    while (isHorizontalWhitespace(*Ptr))
+        ++Ptr;
+    return Ptr[0] == '/' && Ptr[1] == '/';
+} 
+
 // SkipBCPLComment - We have just read the // characters from input.  Skip until
 // we find the newline character thats terminate the comment.  Then update
 /// BufferPtr and return.
@@ -1128,66 +1135,38 @@
   // the comment contains normal ascii characters with nothing interesting in
   // them.  As such, optimize for this case with the inner loop.
   char C;
-  do {
-    C = *CurPtr;
-    // FIXME: Speedup BCPL comment lexing.  Just scan for a \n or \r character.
-    // If we find a \n character, scan backwards, checking to see if it's an
-    // escaped newline, like we do for block comments.
-
-    // Skip over characters in the fast loop.
-    while (C != 0 &&                // Potentially EOF.
-           C != '\\' &&             // Potentially escaped newline.
-           C != '?' &&              // Potentially trigraph.
-           C != '\n' && C != '\r')  // Newline or DOS-style newline.
+  while (1) {
+    C = *CurPtr;    
+    while (C != 0 && C != '\n' && C != '\r')
       C = *++CurPtr;
-
-    // If this is a newline, we're done.
-    if (C == '\n' || C == '\r')
-      break;  // Found the newline? Break out!
-
-    // Otherwise, this is a hard case.  Fall back on getAndAdvanceChar to
-    // properly decode the character.  Read it in raw mode to avoid emitting
-    // diagnostics about things like trigraphs.  If we see an escaped newline,
-    // we'll handle it below.
-    const char *OldPtr = CurPtr;
-    bool OldRawMode = isLexingRawMode();
-    LexingRawMode = true;
-    C = getAndAdvanceChar(CurPtr, Result);
-    LexingRawMode = OldRawMode;
-
-    // If the char that we finally got was a \n, then we must have had something
-    // like \<newline><newline>.  We don't want to have consumed the second
-    // newline, we want CurPtr, to end up pointing to it down below.
-    if (C == '\n' || C == '\r') {
-      --CurPtr;
-      C = 'x'; // doesn't matter what this is.
+    
+    if (C == 0) {
+      assert(CurPtr == BufferEnd);
+      break; // Comment terminated by EOF
     }
-
-    // If we read multiple characters, and one of those characters was a \r or
-    // \n, then we had an escaped newline within the comment.  Emit diagnostic
-    // unless the next line is also a // comment.
-    if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') {
-      for (; OldPtr != CurPtr; ++OldPtr)
-        if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
-          // Okay, we found a // comment that ends in a newline, if the next
-          // line is also a // comment, but has spaces, don't emit a diagnostic.
-          if (isspace(C)) {
-            const char *ForwardPtr = CurPtr;
-            while (isspace(*ForwardPtr))  // Skip whitespace.
-              ++ForwardPtr;
-            if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
-              break;
-          }
-
-          if (!isLexingRawMode())
-            Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
-          break;
-        }
+    
+    const char * PrevPtr = CurPtr;
+    
+    // Find the last non-whitespace character
+    do {
+      C = *--PrevPtr;
+    } while (isHorizontalWhitespace(C));
+    
+    if (C == '\\' ||
+        (C == '/' && PrevPtr[-1] == '?' && PrevPtr[-2] == '?'
+        && this->getFeatures().Trigraphs)) {
+      if (CurPtr[0] == '\r' && CurPtr[1] == '\n' ||
+          (CurPtr[0] == '\n' && CurPtr[1] == '\r'))
+        ++CurPtr; // Move to end of 2-char newline
+      ++CurPtr; // Move past newline
+      if (!isLexingRawMode() && !nextLineIsComment(CurPtr))
+        Diag(PrevPtr, diag::ext_multi_line_bcpl_comment);
+      Result.setFlag(Token::NeedsCleaning);
+      continue;
     }
+    break;
+  }
 
-    if (CurPtr == BufferEnd+1) { --CurPtr; break; }
-  } while (C != '\n' && C != '\r');
-
   // Found but did not consume the newline.  Notify comment handlers about the
   // comment unless we're in a #if 0 block.
   if (PP && !isLexingRawMode() &&