[llvm] r321479 - Improve performance TokenizeWindowsCommandLine
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 27 00:59:53 PST 2017
Author: ruiu
Date: Wed Dec 27 00:59:52 2017
New Revision: 321479
URL: http://llvm.org/viewvc/llvm-project?rev=321479&view=rev
Log:
Improve performance TokenizeWindowsCommandLine
Patcy by Takuto Ikuta.
This patch reduces lld link time of chromium's blink_core.dll in
component build.
Total size of input argument in .directives become nearly 300MB in the
build and calling many strchr and assert becomes bottleneck.
On my desktop machine, 4 times stats of the link time are like below.
Improved around 10%.
This patch
TotalSeconds : 13.4918885
TotalSeconds : 13.9474257
TotalSeconds : 13.4941082
TotalSeconds : 13.6077962
Avg : 13.63530465
master
TotalSeconds : 15.6938531
TotalSeconds : 15.7022508
TotalSeconds : 15.9567202
TotalSeconds : 14.5851505
Avg : 15.48449365
Differential Revision: https://reviews.llvm.org/D41590
Modified:
llvm/trunk/lib/Support/CommandLine.cpp
Modified: llvm/trunk/lib/Support/CommandLine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CommandLine.cpp?rev=321479&r1=321478&r2=321479&view=diff
==============================================================================
--- llvm/trunk/lib/Support/CommandLine.cpp (original)
+++ llvm/trunk/lib/Support/CommandLine.cpp Wed Dec 27 00:59:52 2017
@@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(
O->getNumOccurrencesFlag() == cl::OneOrMore;
}
-static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); }
+static bool isWhitespace(char C) {
+ return C == ' ' || C == '\t' || C == '\r' || C == '\n';
+}
static bool isQuote(char C) { return C == '\"' || C == '\''; }
@@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRe
break;
}
+ char C = Src[I];
+
// Backslash escapes the next character.
- if (I + 1 < E && Src[I] == '\\') {
+ if (I + 1 < E && C == '\\') {
++I; // Skip the escape.
Token.push_back(Src[I]);
continue;
}
// Consume a quoted string.
- if (isQuote(Src[I])) {
- char Quote = Src[I++];
- while (I != E && Src[I] != Quote) {
+ if (isQuote(C)) {
+ ++I;
+ while (I != E && Src[I] != C) {
// Backslash escapes the next character.
if (Src[I] == '\\' && I + 1 != E)
++I;
@@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRe
}
// End the token if this is whitespace.
- if (isWhitespace(Src[I])) {
+ if (isWhitespace(C)) {
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
Token.clear();
@@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRe
}
// This is a normal character. Append it.
- Token.push_back(Src[I]);
+ Token.push_back(C);
}
// Append the last token after hitting EOF with no whitespace.
@@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(Stri
// end of the source string.
enum { INIT, UNQUOTED, QUOTED } State = INIT;
for (size_t I = 0, E = Src.size(); I != E; ++I) {
+ char C = Src[I];
+
// INIT state indicates that the current input index is at the start of
// the string or between tokens.
if (State == INIT) {
- if (isWhitespace(Src[I])) {
+ if (isWhitespace(C)) {
// Mark the end of lines in response files
- if (MarkEOLs && Src[I] == '\n')
+ if (MarkEOLs && C == '\n')
NewArgv.push_back(nullptr);
continue;
}
- if (Src[I] == '"') {
+ if (C == '"') {
State = QUOTED;
continue;
}
- if (Src[I] == '\\') {
+ if (C == '\\') {
I = parseBackslash(Src, I, Token);
State = UNQUOTED;
continue;
}
- Token.push_back(Src[I]);
+ Token.push_back(C);
State = UNQUOTED;
continue;
}
@@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(Stri
// quotes.
if (State == UNQUOTED) {
// Whitespace means the end of the token.
- if (isWhitespace(Src[I])) {
+ if (isWhitespace(C)) {
NewArgv.push_back(Saver.save(StringRef(Token)).data());
Token.clear();
State = INIT;
// Mark the end of lines in response files
- if (MarkEOLs && Src[I] == '\n')
+ if (MarkEOLs && C == '\n')
NewArgv.push_back(nullptr);
continue;
}
- if (Src[I] == '"') {
+ if (C == '"') {
State = QUOTED;
continue;
}
- if (Src[I] == '\\') {
+ if (C == '\\') {
I = parseBackslash(Src, I, Token);
continue;
}
- Token.push_back(Src[I]);
+ Token.push_back(C);
continue;
}
// QUOTED state means that it's reading a token quoted by double quotes.
if (State == QUOTED) {
- if (Src[I] == '"') {
+ if (C == '"') {
State = UNQUOTED;
continue;
}
- if (Src[I] == '\\') {
+ if (C == '\\') {
I = parseBackslash(Src, I, Token);
continue;
}
- Token.push_back(Src[I]);
+ Token.push_back(C);
}
}
// Append the last token after hitting EOF with no whitespace.
More information about the llvm-commits
mailing list