[cfe-commits] Improve performance of SkipBCPLComment()
Peter Davies
ultratwo at gmail.com
Wed Aug 18 10:25:21 PDT 2010
I've optimized SkipBCPLComment() to find the first newline character
and then backtrack.
Peter
-------------- next part --------------
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp (revision 111366)
+++ lib/Lex/Lexer.cpp (working copy)
@@ -1107,6 +1107,13 @@
return false;
}
+// Helper function for SkipBCPLComment
+static inline bool nextLineIsComment(const char * Ptr) {
+ while (isHorizontalWhitespace(*Ptr))
+ ++Ptr;
+ return Ptr[0] == '/' && Ptr[1] == '/';
+}
+
// SkipBCPLComment - We have just read the // characters from input. Skip until
// we find the newline character thats terminate the comment. Then update
/// BufferPtr and return.
@@ -1128,66 +1135,38 @@
// the comment contains normal ascii characters with nothing interesting in
// them. As such, optimize for this case with the inner loop.
char C;
- do {
- C = *CurPtr;
- // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character.
- // If we find a \n character, scan backwards, checking to see if it's an
- // escaped newline, like we do for block comments.
-
- // Skip over characters in the fast loop.
- while (C != 0 && // Potentially EOF.
- C != '\\' && // Potentially escaped newline.
- C != '?' && // Potentially trigraph.
- C != '\n' && C != '\r') // Newline or DOS-style newline.
+ while (1) {
+ C = *CurPtr;
+ while (C != 0 && C != '\n' && C != '\r')
C = *++CurPtr;
-
- // If this is a newline, we're done.
- if (C == '\n' || C == '\r')
- break; // Found the newline? Break out!
-
- // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to
- // properly decode the character. Read it in raw mode to avoid emitting
- // diagnostics about things like trigraphs. If we see an escaped newline,
- // we'll handle it below.
- const char *OldPtr = CurPtr;
- bool OldRawMode = isLexingRawMode();
- LexingRawMode = true;
- C = getAndAdvanceChar(CurPtr, Result);
- LexingRawMode = OldRawMode;
-
- // If the char that we finally got was a \n, then we must have had something
- // like \<newline><newline>. We don't want to have consumed the second
- // newline, we want CurPtr, to end up pointing to it down below.
- if (C == '\n' || C == '\r') {
- --CurPtr;
- C = 'x'; // doesn't matter what this is.
+
+ if (C == 0) {
+ assert(CurPtr == BufferEnd);
+ break; // Comment terminated by EOF
}
-
- // If we read multiple characters, and one of those characters was a \r or
- // \n, then we had an escaped newline within the comment. Emit diagnostic
- // unless the next line is also a // comment.
- if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') {
- for (; OldPtr != CurPtr; ++OldPtr)
- if (OldPtr[0] == '\n' || OldPtr[0] == '\r') {
- // Okay, we found a // comment that ends in a newline, if the next
- // line is also a // comment, but has spaces, don't emit a diagnostic.
- if (isspace(C)) {
- const char *ForwardPtr = CurPtr;
- while (isspace(*ForwardPtr)) // Skip whitespace.
- ++ForwardPtr;
- if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/')
- break;
- }
-
- if (!isLexingRawMode())
- Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment);
- break;
- }
+
+ const char * PrevPtr = CurPtr;
+
+ // Find the last non-whitespace character
+ do {
+ C = *--PrevPtr;
+ } while (isHorizontalWhitespace(C));
+
+ if (C == '\\' ||
+ (C == '/' && PrevPtr[-1] == '?' && PrevPtr[-2] == '?'
+ && this->getFeatures().Trigraphs)) {
+ if (CurPtr[0] == '\r' && CurPtr[1] == '\n' ||
+ (CurPtr[0] == '\n' && CurPtr[1] == '\r'))
+ ++CurPtr; // Move to end of 2-char newline
+ ++CurPtr; // Move past newline
+ if (!isLexingRawMode() && !nextLineIsComment(CurPtr))
+ Diag(PrevPtr, diag::ext_multi_line_bcpl_comment);
+ Result.setFlag(Token::NeedsCleaning);
+ continue;
}
+ break;
+ }
- if (CurPtr == BufferEnd+1) { --CurPtr; break; }
- } while (C != '\n' && C != '\r');
-
// Found but did not consume the newline. Notify comment handlers about the
// comment unless we're in a #if 0 block.
if (PP && !isLexingRawMode() &&
More information about the cfe-commits
mailing list