[llvm] r321479 - Improve performance TokenizeWindowsCommandLine

Wed Dec 27 00:59:53 PST 2017

Author: ruiu
Date: Wed Dec 27 00:59:52 2017
New Revision: 321479

URL: http://llvm.org/viewvc/llvm-project?rev=321479&view=rev
Log:
Improve performance TokenizeWindowsCommandLine

Patcy by Takuto Ikuta.

This patch reduces lld link time of chromium's blink_core.dll in
component build.

Total size of input argument in .directives become nearly 300MB in the
build and calling many strchr and assert becomes bottleneck.

On my desktop machine, 4 times stats of the link time are like below.
Improved around 10%.

This patch
TotalSeconds : 13.4918885
TotalSeconds : 13.9474257
TotalSeconds : 13.4941082
TotalSeconds : 13.6077962
Avg : 13.63530465

master
TotalSeconds : 15.6938531
TotalSeconds : 15.7022508
TotalSeconds : 15.9567202
TotalSeconds : 14.5851505
Avg : 15.48449365

Differential Revision: https://reviews.llvm.org/D41590

Modified:
    llvm/trunk/lib/Support/CommandLine.cpp

Modified: llvm/trunk/lib/Support/CommandLine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CommandLine.cpp?rev=321479&r1=321478&r2=321479&view=diff
==============================================================================

--- llvm/trunk/lib/Support/CommandLine.cpp (original)
+++ llvm/trunk/lib/Support/CommandLine.cpp Wed Dec 27 00:59:52 2017
@@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(
          O->getNumOccurrencesFlag() == cl::OneOrMore;
 }
 
-static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); }
+static bool isWhitespace(char C) {
+  return C == ' ' || C == '\t' || C == '\r' || C == '\n';
+}
 
 static bool isQuote(char C) { return C == '\"' || C == '\''; }
 
@@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRe
         break;
     }
 
+    char C = Src[I];
+
     // Backslash escapes the next character.
-    if (I + 1 < E && Src[I] == '\\') {
+    if (I + 1 < E && C == '\\') {
       ++I; // Skip the escape.
       Token.push_back(Src[I]);
       continue;
     }
 
     // Consume a quoted string.
-    if (isQuote(Src[I])) {
-      char Quote = Src[I++];
-      while (I != E && Src[I] != Quote) {
+    if (isQuote(C)) {
+      ++I;
+      while (I != E && Src[I] != C) {
         // Backslash escapes the next character.
         if (Src[I] == '\\' && I + 1 != E)
           ++I;
@@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRe
     }
 
     // End the token if this is whitespace.
-    if (isWhitespace(Src[I])) {
+    if (isWhitespace(C)) {
       if (!Token.empty())
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
       Token.clear();
@@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRe
     }
 
     // This is a normal character.  Append it.
-    Token.push_back(Src[I]);
+    Token.push_back(C);
   }
 
   // Append the last token after hitting EOF with no whitespace.
@@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(Stri
   // end of the source string.
   enum { INIT, UNQUOTED, QUOTED } State = INIT;
   for (size_t I = 0, E = Src.size(); I != E; ++I) {
+    char C = Src[I];
+
     // INIT state indicates that the current input index is at the start of
     // the string or between tokens.
     if (State == INIT) {
-      if (isWhitespace(Src[I])) {
+      if (isWhitespace(C)) {
         // Mark the end of lines in response files
-        if (MarkEOLs && Src[I] == '\n')
+        if (MarkEOLs && C == '\n')
           NewArgv.push_back(nullptr);
         continue;
       }
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = QUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         State = UNQUOTED;
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
       State = UNQUOTED;
       continue;
     }
@@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(Stri
     // quotes.
     if (State == UNQUOTED) {
       // Whitespace means the end of the token.
-      if (isWhitespace(Src[I])) {
+      if (isWhitespace(C)) {
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
         Token.clear();
         State = INIT;
         // Mark the end of lines in response files
-        if (MarkEOLs && Src[I] == '\n')
+        if (MarkEOLs && C == '\n')
           NewArgv.push_back(nullptr);
         continue;
       }
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = QUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
       continue;
     }
 
     // QUOTED state means that it's reading a token quoted by double quotes.
     if (State == QUOTED) {
-      if (Src[I] == '"') {
+      if (C == '"') {
         State = UNQUOTED;
         continue;
       }
-      if (Src[I] == '\\') {
+      if (C == '\\') {
         I = parseBackslash(Src, I, Token);
         continue;
       }
-      Token.push_back(Src[I]);
+      Token.push_back(C);
     }
   }
   // Append the last token after hitting EOF with no whitespace.