[llvm] f552474 - Fix bugs in EOL marking in command line tokenizers

Reid Kleckner via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 5 13:04:13 PST 2020


Author: Reid Kleckner
Date: 2020-11-05T13:01:32-08:00
New Revision: f55247456e219bb64521c3a73c618267d5bf671c

URL: https://github.com/llvm/llvm-project/commit/f55247456e219bb64521c3a73c618267d5bf671c
DIFF: https://github.com/llvm/llvm-project/commit/f55247456e219bb64521c3a73c618267d5bf671c.diff

LOG: Fix bugs in EOL marking in command line tokenizers

Add unit tests for this behavior, since the integration test for
clang-cl did not catch these bugs.

Fixes PR47604

Differential Revision: https://reviews.llvm.org/D90866

Added: 
    

Modified: 
    llvm/lib/Support/CommandLine.cpp
    llvm/unittests/Support/CommandLineTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e53421a277f1..a185863fddb9 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -832,7 +832,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     // Consume runs of whitespace.
     if (Token.empty()) {
       while (I != E && isWhitespace(Src[I])) {
-        // Mark the end of lines in response files
+        // Mark the end of lines in response files.
         if (MarkEOLs && Src[I] == '\n')
           NewArgv.push_back(nullptr);
         ++I;
@@ -869,6 +869,9 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     if (isWhitespace(C)) {
       if (!Token.empty())
         NewArgv.push_back(Saver.save(StringRef(Token)).data());
+      // Mark the end of lines in response files.
+      if (MarkEOLs && C == '\n')
+        NewArgv.push_back(nullptr);
       Token.clear();
       continue;
     }
@@ -880,9 +883,6 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
   // Append the last token after hitting EOF with no whitespace.
   if (!Token.empty())
     NewArgv.push_back(Saver.save(StringRef(Token)).data());
-  // Mark the end of response files
-  if (MarkEOLs)
-    NewArgv.push_back(nullptr);
 }
 
 /// Backslashes are interpreted in a rather complicated way in the Windows-style
@@ -956,11 +956,11 @@ tokenizeWindowsCommandLineImpl(StringRef Src, StringSaver &Saver,
         ++I;
       StringRef NormalChars = Src.slice(Start, I);
       if (I >= E || isWhitespaceOrNull(Src[I])) {
-        if (I < E && Src[I] == '\n')
-          MarkEOL();
         // No special characters: slice out the substring and start the next
         // token. Copy the string if the caller asks us to.
         AddToken(AlwaysCopy ? Saver.save(NormalChars) : NormalChars);
+        if (I < E && Src[I] == '\n')
+          MarkEOL();
       } else if (Src[I] == '\"') {
         Token += NormalChars;
         State = QUOTED;

diff  --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index c02e9e59a5e0..a05f3894ef05 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -199,14 +199,15 @@ typedef void ParserFunction(StringRef Source, StringSaver &Saver,
                             bool MarkEOLs);
 
 void testCommandLineTokenizer(ParserFunction *parse, StringRef Input,
-                              const char *const Output[], size_t OutputSize) {
+                              ArrayRef<const char *> Output,
+                              bool MarkEOLs = false) {
   SmallVector<const char *, 0> Actual;
   BumpPtrAllocator A;
   StringSaver Saver(A);
-  parse(Input, Saver, Actual, /*MarkEOLs=*/false);
-  EXPECT_EQ(OutputSize, Actual.size());
+  parse(Input, Saver, Actual, MarkEOLs);
+  EXPECT_EQ(Output.size(), Actual.size());
   for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
-    if (I < OutputSize) {
+    if (I < Output.size()) {
       EXPECT_STREQ(Output[I], Actual[I]);
     }
   }
@@ -219,8 +220,7 @@ TEST(CommandLineTest, TokenizeGNUCommandLine) {
   const char *const Output[] = {
       "foo bar",     "foo bar",   "foo bar",          "foo\\bar",
       "-DFOO=bar()", "foobarbaz", "C:\\src\\foo.cpp", "C:srcfoo.cpp"};
-  testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
@@ -228,75 +228,85 @@ TEST(CommandLineTest, TokenizeWindowsCommandLine1) {
       R"(a\b c\\d e\\"f g" h\"i j\\\"k "lmn" o pqr "st \"u" \v)";
   const char *const Output[] = { "a\\b", "c\\\\d", "e\\f g", "h\"i", "j\\\"k",
                                  "lmn", "o", "pqr", "st \"u", "\\v" };
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeWindowsCommandLine2) {
   const char Input[] = "clang -c -DFOO=\"\"\"ABC\"\"\" x.cpp";
   const char *const Output[] = { "clang", "-c", "-DFOO=\"ABC\"", "x.cpp"};
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeWindowsCommandLineQuotedLastArgument) {
   const char Input1[] = R"(a b c d "")";
   const char *const Output1[] = {"a", "b", "c", "d", ""};
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1,
-                           array_lengthof(Output1));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input1, Output1);
   const char Input2[] = R"(a b c d ")";
   const char *const Output2[] = {"a", "b", "c", "d"};
-  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2,
-                           array_lengthof(Output2));
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input2, Output2);
+}
+
+TEST(CommandLineTest, TokenizeAndMarkEOLs) {
+  // Clang uses EOL marking in response files to support options that consume
+  // the rest of the arguments on the current line, but do not consume arguments
+  // from subsequent lines. For example, given these rsp files contents:
+  // /c /Zi /O2
+  // /Oy- /link /debug /opt:ref
+  // /Zc:ThreadsafeStatics-
+  //
+  // clang-cl needs to treat "/debug /opt:ref" as linker flags, and everything
+  // else as compiler flags. The tokenizer inserts nullptr sentinels into the
+  // output so that clang-cl can find the end of the current line.
+  const char Input[] = "clang -Xclang foo\n\nfoo\"bar\"baz\n x.cpp\n";
+  const char *const Output[] = {"clang", "-Xclang", "foo",
+                                nullptr, nullptr,   "foobarbaz",
+                                nullptr, "x.cpp",   nullptr};
+  testCommandLineTokenizer(cl::TokenizeWindowsCommandLine, Input, Output,
+                           /*MarkEOLs=*/true);
+  testCommandLineTokenizer(cl::TokenizeGNUCommandLine, Input, Output,
+                           /*MarkEOLs=*/true);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile1) {
   const char *Input = "\\";
   const char *const Output[] = { "\\" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile2) {
   const char *Input = "\\abc";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile3) {
   const char *Input = "abc\\";
   const char *const Output[] = { "abc\\" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile4) {
   const char *Input = "abc\\\n123";
   const char *const Output[] = { "abc123" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile5) {
   const char *Input = "abc\\\r\n123";
   const char *const Output[] = { "abc123" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile6) {
   const char *Input = "abc\\\n";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile7) {
   const char *Input = "abc\\\r\n";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile8) {
@@ -318,15 +328,13 @@ TEST(CommandLineTest, TokenizeConfigFile9) {
 TEST(CommandLineTest, TokenizeConfigFile10) {
   const char *Input = "\\\nabc";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, TokenizeConfigFile11) {
   const char *Input = "\\\r\nabc";
   const char *const Output[] = { "abc" };
-  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output,
-                           array_lengthof(Output));
+  testCommandLineTokenizer(cl::tokenizeConfigFile, Input, Output);
 }
 
 TEST(CommandLineTest, AliasesWithArguments) {
@@ -962,6 +970,34 @@ TEST(CommandLineTest, ResponseFileRelativePath) {
               testing::Pointwise(StringEquality(), {"test/test", "-flag"}));
 }
 
+TEST(CommandLineTest, ResponseFileEOLs) {
+  vfs::InMemoryFileSystem FS;
+#ifdef _WIN32
+  const char *TestRoot = "C:\\";
+#else
+  const char *TestRoot = "//net";
+#endif
+  FS.setCurrentWorkingDirectory(TestRoot);
+  FS.addFile("eols.rsp", 0,
+             MemoryBuffer::getMemBuffer("-Xclang -Wno-whatever\n input.cpp"));
+  SmallVector<const char *, 2> Argv = {"clang", "@eols.rsp"};
+  BumpPtrAllocator A;
+  StringSaver Saver(A);
+  ASSERT_TRUE(cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine,
+                                      Argv, true, true, FS,
+                                      /*CurrentDir=*/StringRef(TestRoot)));
+  const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr,
+                            "input.cpp"};
+  ASSERT_EQ(array_lengthof(Expected), Argv.size());
+  for (size_t I = 0, E = array_lengthof(Expected); I < E; ++I) {
+    if (Expected[I] == nullptr) {
+      ASSERT_EQ(Argv[I], nullptr);
+    } else {
+      ASSERT_STREQ(Expected[I], Argv[I]);
+    }
+  }
+}
+
 TEST(CommandLineTest, SetDefautValue) {
   cl::ResetCommandLineParser();
 


        


More information about the llvm-commits mailing list