[llvm] f552474 - Fix bugs in EOL marking in command line tokenizers
Reid Kleckner via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 5 13:04:13 PST 2020
Author: Reid Kleckner
Date: 2020-11-05T13:01:32-08:00
New Revision: f55247456e219bb64521c3a73c618267d5bf671c
URL: https://github.com/llvm/llvm-project/commit/f55247456e219bb64521c3a73c618267d5bf671c
DIFF: https://github.com/llvm/llvm-project/commit/f55247456e219bb64521c3a73c618267d5bf671c.diff
LOG: Fix bugs in EOL marking in command line tokenizers
Add unit tests for this behavior, since the integration test for
clang-cl did not catch these bugs.
Fixes PR47604
Differential Revision: https://reviews.llvm.org/D90866
Added:
Modified:
llvm/lib/Support/CommandLine.cpp
llvm/unittests/Support/CommandLineTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e53421a277f1..a185863fddb9 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -832,7 +832,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
// Consume runs of whitespace.
if (Token.empty()) {
while (I != E && isWhitespace(Src[I])) {
- // Mark the end of lines in response files
+ // Mark the end of lines in response files.
if (MarkEOLs && Src[I] == '\n')
NewArgv.push_back(nullptr);
++I;
@@ -869,6 +869,9 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
if (isWhitespace(C)) {
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
+ // Mark the end of lines in response files.
+ if (MarkEOLs && C == '\n')
+ NewArgv.push_back(nullptr);
Token.clear();
continue;
}
@@ -880,9 +883,6 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
// Append the last token after hitting EOF with no whitespace.
if (!Token.empty())
NewArgv.push_back(Saver.save(StringRef(Token)).data());
- // Mark the end of response files
- if (MarkEOLs)
- NewArgv.push_back(nullptr);
}
/// Backslashes are interpreted in a rather complicated way in the Windows-style
@@ -956,11 +956,11 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
++I;
StringRef NormalChars = Src.slice(Start, I);
if (I >= E || isWhitespaceOrNull(Src[I])) {
- if (I < E && Src[I] == '\n')
- MarkEOL();
// No special characters: slice out the substring and start the next
// token. Copy the string if the caller asks us to.
AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
+ if (I < E && Src[I] == '\n')
+ MarkEOL();
} else if (Src[I] == '\"') {
Token += NormalChars;
State = QUOTED;
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index c02e9e59a5e0..a05f3894ef05 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -199,14 +199,15 @@ typedef void ParserFunction(StringRef Source, StringSaver &Saver,
bool MarkEOLs);
void testCommandLineTokenizer(ParserFunction *parse, StringRef Input,
- const char *const Output[], size_t OutputSize) {
+ ArrayRef<const char *> Output,
+ bool MarkEOLs = false) {
SmallVector<const char *, 0> Actual;
BumpPtrAllocator A;
StringSaver Saver(A);
- parse(Input, Saver, Actual, /*MarkEOLs=*/false);
- EXPECT_EQ(OutputSize, Actual.size());
+ parse(Input, Saver, Actual, MarkEOLs);
+ EXPECT_EQ(Output.size(), Actual.size());
for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
- if (I < OutputSize) {
+ if (I < Output.size()) {
EXPECT_STREQ(Output[I], Actual[I]);
}
}
@@ -219,8 +220,7 @@ TEST(CommandLineTest, TokenizeGNUCommandLine) {
const char *const Output[] = {
"foo bar", "foo bar", "foo bar", "foo\\bar",
"-DFOO=bar()", "foobarbaz", "C:\\src\\foo.cpp", "C:srcfoo.cpp"};
- testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
@@ -228,75 +228,85 @@ TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
R"(a\b c\\d e\\"f g" h\"i j\\\"k "lmn" o pqr "st \"u" \v)";
const char *const Output[] = { "a\\b", "c\\\\d", "e\\f g", "h\"i", "j\\\"k",
"lmn", "o", "pqr", "st \"u", "\\v" };
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLine2) {
const char Input[] = "clang -c -DFOO=\"\"\"ABC\"\"\" x.cpp";
const char *const Output[] = { "clang", "-c", "-DFOO=\"ABC\"", "x.cpp"};
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
}
TEST(CommandLineTest, TokenizeWindowsCommandLineQuotedLastArgument) {
const char Input1[] = R"(a b c d "")";
const char *const Output1[] = {"a", "b", "c", "d", ""};
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1,
- array_lengthof(Output1));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1);
const char Input2[] = R"(a b c d ")";
const char *const Output2[] = {"a", "b", "c", "d"};
- testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2,
- array_lengthof(Output2));
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2);
+}
+
+TEST(CommandLineTest, TokenizeAndMarkEOLs) {
+ // Clang uses EOL marking in response files to support options that consume
+ // the rest of the arguments on the current line, but do not consume arguments
+ // from subsequent lines. For example, given these rsp files contents:
+ // /c /Zi /O2
+ // /Oy- /link /debug /opt:ref
+ // /Zc:ThreadsafeStatics-
+ //
+ // clang-cl needs to treat "/debug /opt:ref" as linker flags, and everything
+ // else as compiler flags. The tokenizer inserts nullptr sentinels into the
+ // output so that clang-cl can find the end of the current line.
+ const char Input[] = "clang -Xclang foo\n\nfoo\"bar\"baz\n x.cpp\n";
+ const char *const Output[] = {"clang", "-Xclang", "foo",
+ nullptr, nullptr, "foobarbaz",
+ nullptr, "x.cpp", nullptr};
+ testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
+ /*MarkEOLs=*/true);
+ testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
+ /*MarkEOLs=*/true);
}
TEST(CommandLineTest, TokenizeConfigFile1) {
const char *Input = "\\";
const char *const Output[] = { "\\" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile2) {
const char *Input = "\\abc";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile3) {
const char *Input = "abc\\";
const char *const Output[] = { "abc\\" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile4) {
const char *Input = "abc\\\n123";
const char *const Output[] = { "abc123" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile5) {
const char *Input = "abc\\\r\n123";
const char *const Output[] = { "abc123" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile6) {
const char *Input = "abc\\\n";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile7) {
const char *Input = "abc\\\r\n";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile8) {
@@ -318,15 +328,13 @@ TEST(CommandLineTest, TokenizeConfigFile9) {
TEST(CommandLineTest, TokenizeConfigFile10) {
const char *Input = "\\\nabc";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, TokenizeConfigFile11) {
const char *Input = "\\\r\nabc";
const char *const Output[] = { "abc" };
- testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
- array_lengthof(Output));
+ testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
}
TEST(CommandLineTest, AliasesWithArguments) {
@@ -962,6 +970,34 @@ TEST(CommandLineTest, ResponseFileRelativePath) {
testing::Pointwise(StringEquality(), {"test/test", "-flag"}));
}
+TEST(CommandLineTest, ResponseFileEOLs) {
+ vfs::InMemoryFileSystem FS;
+#ifdef _WIN32
+ const char *TestRoot = "C:\\";
+#else
+ const char *TestRoot = "//net";
+#endif
+ FS.setCurrentWorkingDirectory(TestRoot);
+ FS.addFile("eols.rsp", 0,
+ MemoryBuffer::getMemBuffer("-Xclang -Wno-whatever\n input.cpp"));
+ SmallVector<const char *, 2> Argv = {"clang", "@eols.rsp"};
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ ASSERT_TRUE(cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine,
+ Argv, true, true, FS,
+ /*CurrentDir=*/StringRef(TestRoot)));
+ const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr,
+ "input.cpp"};
+ ASSERT_EQ(array_lengthof(Expected), Argv.size());
+ for (size_t I = 0, E = array_lengthof(Expected); I < E; ++I) {
+ if (Expected[I] == nullptr) {
+ ASSERT_EQ(Argv[I], nullptr);
+ } else {
+ ASSERT_STREQ(Expected[I], Argv[I]);
+ }
+ }
+}
+
TEST(CommandLineTest, SetDefautValue) {
cl::ResetCommandLineParser();
More information about the llvm-commits
mailing list